tellaro-query-language 0.2.1__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/PKG-INFO +2 -1
  2. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/pyproject.toml +21 -1
  3. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/__init__.py +1 -1
  4. tellaro_query_language-0.2.3/src/tql/cache/base.py +97 -0
  5. tellaro_query_language-0.2.3/src/tql/cache/memory.py +171 -0
  6. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/core.py +19 -2
  7. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/core_components/opensearch_operations.py +23 -4
  8. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/evaluator.py +2 -0
  9. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/evaluator_components/value_comparison.py +74 -12
  10. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/mutator_analyzer.py +2 -2
  11. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/opensearch_mappings.py +2 -2
  12. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/opensearch_stats.py +3 -1
  13. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/parser.py +52 -21
  14. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/post_processor.py +39 -10
  15. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/scripts.py +19 -2
  16. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/stats_evaluator.py +5 -3
  17. tellaro_query_language-0.2.1/src/tql/cache/base.py +0 -25
  18. tellaro_query_language-0.2.1/src/tql/cache/memory.py +0 -63
  19. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/LICENSE +0 -0
  20. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/README.md +0 -0
  21. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/analyzer.py +0 -0
  22. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/cache/__init__.py +0 -0
  23. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/cache/redis.py +0 -0
  24. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/core_components/README.md +0 -0
  25. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/core_components/__init__.py +0 -0
  26. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/core_components/file_operations.py +0 -0
  27. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/core_components/stats_operations.py +0 -0
  28. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/core_components/validation_operations.py +0 -0
  29. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/evaluator_components/README.md +0 -0
  30. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/evaluator_components/__init__.py +0 -0
  31. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/evaluator_components/field_access.py +0 -0
  32. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/evaluator_components/special_expressions.py +0 -0
  33. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/exceptions.py +0 -0
  34. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/geoip_normalizer.py +0 -0
  35. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/mutators/__init__.py +0 -0
  36. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/mutators/base.py +0 -0
  37. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/mutators/dns.py +0 -0
  38. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/mutators/encoding.py +0 -0
  39. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/mutators/geo.py +0 -0
  40. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/mutators/list.py +0 -0
  41. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/mutators/network.py +0 -0
  42. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/mutators/security.py +0 -0
  43. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/mutators/string.py +0 -0
  44. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/opensearch.py +0 -0
  45. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/opensearch_components/README.md +0 -0
  46. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/opensearch_components/__init__.py +0 -0
  47. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/opensearch_components/field_mapping.py +0 -0
  48. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/opensearch_components/lucene_converter.py +0 -0
  49. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/opensearch_components/query_converter.py +0 -0
  50. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/parser_components/README.md +0 -0
  51. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/parser_components/__init__.py +0 -0
  52. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/parser_components/ast_builder.py +0 -0
  53. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/parser_components/error_analyzer.py +0 -0
  54. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/parser_components/field_extractor.py +0 -0
  55. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/parser_components/grammar.py +0 -0
  56. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/stats_transformer.py +0 -0
  57. {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/validators.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tellaro-query-language
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: A flexible, human-friendly query language for searching and filtering structured data
5
5
  Home-page: https://github.com/tellaro/tellaro-query-language
6
6
  License: MIT
@@ -24,6 +24,7 @@ Requires-Dist: opensearch-dsl (>=2.1.0,<3.0.0) ; extra == "opensearch"
24
24
  Requires-Dist: opensearch-py (>=2.4.2,<3.0.0) ; extra == "opensearch"
25
25
  Requires-Dist: pyparsing (>=3.2.1,<4.0.0)
26
26
  Requires-Dist: setuptools (>=80.0.0,<81.0.0)
27
+ Requires-Dist: urllib3 (>=2.5.0,<3.0.0)
27
28
  Project-URL: Documentation, https://github.com/tellaro/tellaro-query-language/tree/main/docs
28
29
  Project-URL: Repository, https://github.com/tellaro/tellaro-query-language
29
30
  Description-Content-Type: text/markdown
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "tellaro-query-language"
3
- version = "0.2.1"
3
+ version = "0.2.3"
4
4
  description = "A flexible, human-friendly query language for searching and filtering structured data"
5
5
  authors = ["Justin Henderson <justin@tellaro.io>"]
6
6
  license = "MIT"
@@ -39,6 +39,7 @@ dnspython = "^2.7.0"
39
39
  opensearch-py = {version = "^2.4.2", optional = true}
40
40
  opensearch-dsl = {version = "^2.1.0", optional = true}
41
41
  maxminddb = "^2.7.0"
42
+ urllib3 = "^2.5.0"
42
43
 
43
44
  [tool.poetry.extras]
44
45
  opensearch = ["opensearch-py", "opensearch-dsl"]
@@ -94,3 +95,22 @@ line_length = 120
94
95
  filterwarnings = [
95
96
  "ignore:Unverified HTTPS request:urllib3.exceptions.InsecureRequestWarning",
96
97
  ]
98
+ # Only collect tests from these directories
99
+ testpaths = [
100
+ "tests",
101
+ ]
102
+ # Don't collect from these directories
103
+ norecursedirs = [
104
+ ".*",
105
+ "build",
106
+ "dist",
107
+ "*.egg",
108
+ "venv",
109
+ "env",
110
+ "__pycache__",
111
+ "playground",
112
+ ]
113
+ # Custom markers
114
+ markers = [
115
+ "integration: marks tests as integration tests (deselect with '-m \"not integration\"')",
116
+ ]
@@ -28,7 +28,7 @@ from .opensearch_mappings import (
28
28
  get_sample_data_from_index,
29
29
  )
30
30
 
31
- __version__ = "0.1.0"
31
+ __version__ = "0.2.2"
32
32
  __all__ = [
33
33
  "TQL",
34
34
  "TQLParseError",
@@ -0,0 +1,97 @@
1
+ """Base cache infrastructure for TQL.
2
+
3
+ This module provides the base CacheManager class that defines the caching
4
+ interface used throughout TQL. Concrete implementations include LocalCacheManager
5
+ for in-memory caching and RedisCacheManager for distributed caching.
6
+ """
7
+
8
+ from typing import Any, Dict, Optional
9
+
10
+
11
+ class CacheManager:
12
+ """Base class for cache management.
13
+
14
+ This class defines the interface for all cache implementations in TQL.
15
+ Subclasses should override these methods to provide actual caching functionality.
16
+
17
+ The base implementation provides no-op defaults that can be safely used when
18
+ caching is disabled or not needed.
19
+
20
+ Example:
21
+ >>> cache = LocalCacheManager()
22
+ >>> cache.set("user:123", {"name": "Alice", "age": 30}, ttl=3600)
23
+ >>> user = cache.get("user:123")
24
+ >>> cache.delete("user:123")
25
+ """
26
+
27
+ def get(self, key: str) -> Optional[Any]:
28
+ """Retrieve a value from the cache.
29
+
30
+ Args:
31
+ key: The cache key to look up. Should be a string identifier.
32
+
33
+ Returns:
34
+ The cached value if it exists and hasn't expired, None otherwise.
35
+
36
+ Example:
37
+ >>> value = cache.get("my_key")
38
+ >>> if value is not None:
39
+ ... print(f"Found: {value}")
40
+ """
41
+ return None
42
+
43
+ def set(self, key: str, value: Any, ttl: Optional[int] = None) -> None:
44
+ """Store a value in the cache.
45
+
46
+ Args:
47
+ key: The cache key under which to store the value.
48
+ value: The value to cache. Can be any Python object.
49
+ ttl: Time-to-live in seconds. If None or 0, the value never expires.
50
+
51
+ Example:
52
+ >>> cache.set("config", {"debug": True}, ttl=300) # Cache for 5 minutes
53
+ >>> cache.set("permanent", {"version": "1.0"}) # Never expires
54
+ """
55
+
56
+ def delete(self, key: str) -> None:
57
+ """Remove a value from the cache.
58
+
59
+ Args:
60
+ key: The cache key to delete.
61
+
62
+ Example:
63
+ >>> cache.delete("expired_key")
64
+ """
65
+
66
+ def clear_pattern(self, pattern: str) -> int: # pylint: disable=unused-argument
67
+ """Clear all keys matching a pattern.
68
+
69
+ Args:
70
+ pattern: A pattern string to match keys. Format depends on implementation.
71
+ For Redis: supports wildcards like "user:*" or "session:?123"
72
+ For Local: basic string matching
73
+
74
+ Returns:
75
+ The number of keys that were deleted.
76
+
77
+ Example:
78
+ >>> count = cache.clear_pattern("temp:*")
79
+ >>> print(f"Cleared {count} temporary keys")
80
+ """
81
+ return 0
82
+
83
+ def get_stats(self) -> Dict[str, Any]:
84
+ """Get cache statistics and metrics.
85
+
86
+ Returns:
87
+ Dictionary containing cache statistics such as:
88
+ - hit_rate: Cache hit rate percentage
89
+ - miss_rate: Cache miss rate percentage
90
+ - size: Number of items in cache
91
+ - memory_usage: Memory used by cache (if available)
92
+
93
+ Example:
94
+ >>> stats = cache.get_stats()
95
+ >>> print(f"Hit rate: {stats.get('hit_rate', 0)}%")
96
+ """
97
+ return {}
@@ -0,0 +1,171 @@
1
+ """In-memory cache implementation for TQL.
2
+
3
+ This module provides a simple in-memory cache with TTL (time-to-live) support
4
+ and basic LRU (Least Recently Used) eviction when the cache reaches its size limit.
5
+ """
6
+
7
+ import time
8
+ from typing import Any, Dict, Optional
9
+
10
+ from .base import CacheManager
11
+
12
+
13
+ class LocalCacheManager(CacheManager):
14
+ """Local in-memory cache with TTL and LRU eviction.
15
+
16
+ This implementation provides thread-safe in-memory caching suitable for
17
+ single-process applications. For distributed caching across multiple
18
+ processes or servers, use RedisCacheManager instead.
19
+
20
+ Features:
21
+ - TTL-based expiration
22
+ - LRU eviction when cache is full
23
+ - Hit/miss statistics tracking
24
+ - Pattern-based key clearing
25
+
26
+ Args:
27
+ max_size: Maximum number of items to store (default: 10000)
28
+ default_ttl: Default time-to-live in seconds (default: 3600 = 1 hour)
29
+
30
+ Example:
31
+ >>> cache = LocalCacheManager(max_size=1000, default_ttl=600)
32
+ >>> cache.set("user:123", {"name": "Alice"}, ttl=300)
33
+ >>> user = cache.get("user:123")
34
+ >>> stats = cache.get_stats()
35
+ >>> print(f"Hit rate: {stats['hit_rate']:.2%}")
36
+
37
+ Attributes:
38
+ max_size: Maximum cache size
39
+ default_ttl: Default TTL for cached items
40
+ """
41
+
42
+ def __init__(self, max_size: int = 10000, default_ttl: int = 3600):
43
+ """Initialize the local cache.
44
+
45
+ Args:
46
+ max_size: Maximum number of items to cache before eviction starts.
47
+ default_ttl: Default expiration time in seconds for cached items.
48
+ """
49
+ self.max_size = max_size
50
+ self.default_ttl = default_ttl
51
+ self._cache: Dict[str, Any] = {}
52
+ self._expiry: Dict[str, float] = {}
53
+ self._hits = 0
54
+ self._misses = 0
55
+
56
+ def get(self, key: str) -> Optional[Any]:
57
+ """Retrieve value from cache if not expired.
58
+
59
+ Args:
60
+ key: The cache key to retrieve.
61
+
62
+ Returns:
63
+ The cached value if present and not expired, None otherwise.
64
+
65
+ Note:
66
+ This method automatically removes expired keys when accessed.
67
+ Hit/miss statistics are updated on each call.
68
+ """
69
+ if key in self._cache:
70
+ expiry = self._expiry.get(key, float('inf'))
71
+ if expiry == 0 or expiry > time.time():
72
+ self._hits += 1
73
+ return self._cache[key]
74
+ else:
75
+ # Expired - clean up
76
+ del self._cache[key]
77
+ del self._expiry[key]
78
+ self._misses += 1
79
+ return None
80
+
81
+ def set(self, key: str, value: Any, ttl: Optional[int] = None) -> None:
82
+ """Store value in cache with optional TTL.
83
+
84
+ Args:
85
+ key: The cache key under which to store the value.
86
+ value: The value to cache (any Python object).
87
+ ttl: Time-to-live in seconds. If None, uses default_ttl.
88
+ If 0, the item never expires.
89
+
90
+ Note:
91
+ When the cache is full (reaches max_size), the oldest item
92
+ is evicted to make room for the new one (LRU eviction).
93
+ """
94
+ if len(self._cache) >= self.max_size and key not in self._cache:
95
+ # Simple eviction: remove oldest (first in dict)
96
+ oldest_key = next(iter(self._cache))
97
+ del self._cache[oldest_key]
98
+ self._expiry.pop(oldest_key, None)
99
+
100
+ self._cache[key] = value
101
+ if ttl == 0:
102
+ # Never expires
103
+ self._expiry[key] = 0
104
+ else:
105
+ expiry_time = time.time() + (ttl if ttl is not None else self.default_ttl)
106
+ self._expiry[key] = expiry_time
107
+
108
+ def delete(self, key: str) -> None:
109
+ """Remove value from cache.
110
+
111
+ Args:
112
+ key: The cache key to delete.
113
+
114
+ Note:
115
+ If the key doesn't exist, this method does nothing (no error raised).
116
+ """
117
+ self._cache.pop(key, None)
118
+ self._expiry.pop(key, None)
119
+
120
+ def clear_pattern(self, pattern: str) -> int:
121
+ """Clear all keys matching a glob pattern.
122
+
123
+ Args:
124
+ pattern: A glob pattern to match keys. Supports wildcards:
125
+ - '*' matches any sequence of characters
126
+ - '?' matches any single character
127
+ - '[seq]' matches any character in seq
128
+ - '[!seq]' matches any character not in seq
129
+
130
+ Returns:
131
+ The number of keys that were deleted.
132
+
133
+ Example:
134
+ >>> cache.set("user:123", data1)
135
+ >>> cache.set("user:456", data2)
136
+ >>> cache.set("session:789", data3)
137
+ >>> count = cache.clear_pattern("user:*") # Deletes user:123 and user:456
138
+ >>> print(count) # 2
139
+ """
140
+ import fnmatch
141
+
142
+ keys_to_delete = [k for k in self._cache.keys() if fnmatch.fnmatch(k, pattern)]
143
+ for key in keys_to_delete:
144
+ del self._cache[key]
145
+ self._expiry.pop(key, None)
146
+ return len(keys_to_delete)
147
+
148
+ def get_stats(self) -> Dict[str, Any]:
149
+ """Get cache performance statistics.
150
+
151
+ Returns:
152
+ Dictionary containing:
153
+ - hits: Number of successful cache retrievals
154
+ - misses: Number of cache misses
155
+ - hit_rate: Ratio of hits to total requests (0.0 to 1.0)
156
+ - size: Current number of items in cache
157
+ - max_size: Maximum cache capacity
158
+
159
+ Example:
160
+ >>> stats = cache.get_stats()
161
+ >>> print(f"Cache is {stats['hit_rate']:.2%} effective")
162
+ >>> print(f"Using {stats['size']}/{stats['max_size']} slots")
163
+ """
164
+ total_requests = self._hits + self._misses
165
+ return {
166
+ "hits": self._hits,
167
+ "misses": self._misses,
168
+ "hit_rate": self._hits / total_requests if total_requests > 0 else 0.0,
169
+ "size": len(self._cache),
170
+ "max_size": self.max_size,
171
+ }
@@ -100,8 +100,25 @@ class TQL:
100
100
  # This is an OpenSearch-style mapping, map field to itself
101
101
  self._simple_mappings[k] = k
102
102
  else:
103
- # Extract the first key as the simple mapping
104
- self._simple_mappings[k] = next(iter(v.keys()))
103
+ # Intelligent field mapping extraction for complex mappings
104
+ # Priority: 1) Key matching field name, 2) Key without dots (primary field), 3) First key
105
+
106
+ if k in v:
107
+ # Field name exists as key in mapping (e.g., {"username": {"username": "keyword", ...}})
108
+ self._simple_mappings[k] = k
109
+ else:
110
+ # Find primary field (keys without dots, not starting with underscore)
111
+ primary_fields = [
112
+ field_key for field_key in v.keys()
113
+ if '.' not in field_key and not field_key.startswith('_')
114
+ ]
115
+
116
+ if primary_fields:
117
+ # Use first primary field
118
+ self._simple_mappings[k] = primary_fields[0]
119
+ else:
120
+ # Fallback to first key (maintain backward compatibility)
121
+ self._simple_mappings[k] = next(iter(v.keys()))
105
122
  else:
106
123
  # Default to mapping field to itself
107
124
  self._simple_mappings[k] = k
@@ -239,7 +239,7 @@ class OpenSearchOperations:
239
239
  analysis_result = self.analyze_opensearch_query(query)
240
240
  has_mutators = isinstance(analysis_result, MutatorAnalysisResult)
241
241
  needs_post_processing_for_stats = (
242
- has_mutators and bool(analysis_result.post_processing_requirements) if has_mutators else False
242
+ has_mutators and bool(analysis_result.post_processing_requirements) if has_mutators else False # type: ignore[union-attr]
243
243
  )
244
244
 
245
245
  # Handle stats queries differently
@@ -258,7 +258,7 @@ class OpenSearchOperations:
258
258
  if filter_ast:
259
259
  # Use the optimized AST if we have mutators
260
260
  if has_mutators and needs_post_processing_for_stats:
261
- filter_query = backend.convert(analysis_result.optimized_ast.get("filter", filter_ast))["query"]
261
+ filter_query = backend.convert(analysis_result.optimized_ast.get("filter", filter_ast))["query"] # type: ignore[union-attr]
262
262
  else:
263
263
  filter_query = backend.convert(filter_ast)["query"]
264
264
  else:
@@ -529,6 +529,8 @@ class OpenSearchOperations:
529
529
  stats_evaluator = TQLStatsEvaluator()
530
530
 
531
531
  # Execute the stats aggregation in memory
532
+ if stats_ast_for_post_processing is None:
533
+ raise ValueError("Stats AST is None but phase2 processing was requested")
532
534
  stats_results = stats_evaluator.evaluate_stats(filtered_docs, stats_ast_for_post_processing, {})
533
535
 
534
536
  # Format response for stats-only (no documents)
@@ -547,7 +549,7 @@ class OpenSearchOperations:
547
549
  "performance_impact": {
548
550
  "overhead_ms": 0, # Would need timing to calculate
549
551
  "documents_processed": len(all_documents),
550
- "mutators_applied": len(analysis_result.post_processing_requirements) if has_mutators else 0,
552
+ "mutators_applied": len(analysis_result.post_processing_requirements) if has_mutators else 0, # type: ignore[union-attr]
551
553
  },
552
554
  "opensearch_query": complete_opensearch_query,
553
555
  }
@@ -580,6 +582,8 @@ class OpenSearchOperations:
580
582
  translator = OpenSearchStatsTranslator()
581
583
 
582
584
  # Transform the response using the translator
585
+ if stats_ast is None:
586
+ raise ValueError("Stats AST is None but grouping was detected")
583
587
  transformed_response = translator.transform_response(response, stats_ast)
584
588
 
585
589
  # The transformed response already has the correct structure
@@ -925,6 +929,21 @@ class OpenSearchOperations:
925
929
  # Get opensearch total before filtering
926
930
  opensearch_total = total_hits
927
931
 
932
+ # Track optimization features used in this query
933
+ optimizations_applied = []
934
+ if scan_all:
935
+ optimizations_applied.append("scroll_api")
936
+ if needs_phase2 and pagination_stats and pagination_stats.get("pages_checked", 0) > 1:
937
+ optimizations_applied.append("auto_pagination")
938
+ if request_cache:
939
+ optimizations_applied.append("request_cache")
940
+ if preference:
941
+ optimizations_applied.append("preference_routing")
942
+ if routing:
943
+ optimizations_applied.append("custom_routing")
944
+ if terminate_after:
945
+ optimizations_applied.append("early_termination")
946
+
928
947
  result = {
929
948
  "results": results,
930
949
  "total": len(results),
@@ -934,7 +953,7 @@ class OpenSearchOperations:
934
953
  "health_status": health_status,
935
954
  "health_reasons": health_reasons,
936
955
  "performance_impact": performance_impact,
937
- "optimizations_applied": [], # TODO: Track actual optimizations # noqa: W0511
956
+ "optimizations_applied": optimizations_applied,
938
957
  "opensearch_query": (
939
958
  complete_opensearch_query if "complete_opensearch_query" in locals() else {}
940
959
  ), # Include the full query body
@@ -350,6 +350,8 @@ class TQLEvaluator:
350
350
  return left_missing or right_missing
351
351
  elif node_type == "unary_op":
352
352
  # Don't recurse through NOT operators - they handle missing fields themselves
353
+ # The NOT operator has special logic at lines 213-254 that handles missing fields correctly
354
+ # Recursing here would cause double-handling and incorrect results
353
355
  return False
354
356
  elif node_type == "collection_op":
355
357
  field_name = node["field"]
@@ -6,6 +6,7 @@ operator implementations, and special cases like CIDR matching.
6
6
 
7
7
  import ipaddress
8
8
  import re
9
+ from functools import lru_cache
9
10
  from typing import Any
10
11
 
11
12
 
@@ -15,6 +16,23 @@ class ValueComparator:
15
16
  # Sentinel value to distinguish missing fields from None values
16
17
  _MISSING_FIELD = object()
17
18
 
19
+ @staticmethod
20
+ @lru_cache(maxsize=256)
21
+ def _compile_regex(pattern: str) -> re.Pattern:
22
+ """Compile and cache regex patterns for performance.
23
+
24
+ Args:
25
+ pattern: Regex pattern string
26
+
27
+ Returns:
28
+ Compiled regex pattern
29
+
30
+ Note:
31
+ Uses LRU cache with max 256 patterns. This significantly improves
32
+ performance when the same regex patterns are used repeatedly in queries.
33
+ """
34
+ return re.compile(pattern)
35
+
18
36
  def compare_values(self, field_value: Any, operator: str, expected_value: Any) -> bool: # noqa: C901
19
37
  """Compare a field value against an expected value using the given operator.
20
38
 
@@ -49,9 +67,17 @@ class ValueComparator:
49
67
  return False
50
68
 
51
69
  # Handle None field values (field exists but is None)
70
+ # IMPORTANT: None is a valid value, distinct from missing fields.
71
+ # For 'exists' operator: This code path should NOT be reached because 'exists'
72
+ # checks field presence in the record, not the value. The evaluator handles
73
+ # 'exists' before calling compare_values. If we reach here with None, it means
74
+ # the field exists but has None value, which should NOT match 'exists'.
52
75
  if field_value is None:
53
76
  if operator in ["exists"]:
54
- return True # Field exists, even if value is None
77
+ # Field key exists in record but value is None
78
+ # Semantics: 'exists' means "field has a non-null value"
79
+ # This matches database behavior where NULL != EXISTS
80
+ return False # None value does not satisfy 'exists'
55
81
  elif operator in ["is"]:
56
82
  # Check for null comparison - expected_value can be None or "null"
57
83
  return expected_value is None or (isinstance(expected_value, str) and expected_value.lower() == "null")
@@ -59,6 +85,10 @@ class ValueComparator:
59
85
  return False
60
86
 
61
87
  # Convert numeric strings to numbers for comparison
88
+ # IMPORTANT: Store original values to check if conversion succeeded
89
+ field_value_original = field_value
90
+ expected_value_original = expected_value
91
+
62
92
  field_value = self._convert_numeric(field_value)
63
93
  expected_value = self._convert_numeric(expected_value)
64
94
 
@@ -68,6 +98,20 @@ class ValueComparator:
68
98
  if isinstance(field_value, str) and field_value.lower() in ["true", "false"]:
69
99
  field_value = field_value.lower() == "true"
70
100
 
101
+ # Type compatibility check for numeric operators
102
+ # If operator requires numeric comparison, both values must be numeric
103
+ # Exception: Arrays are handled specially in the operator logic below
104
+ if operator in ["gt", "gte", "lt", "lte", ">", ">=", "<", "<="]:
105
+ # Skip check if field_value is an array - handled by array logic below
106
+ if not isinstance(field_value, (list, tuple)):
107
+ field_is_numeric = isinstance(field_value, (int, float)) and not isinstance(field_value, bool)
108
+ expected_is_numeric = isinstance(expected_value, (int, float)) and not isinstance(expected_value, bool)
109
+
110
+ if not (field_is_numeric and expected_is_numeric):
111
+ # At least one value failed numeric conversion
112
+ # Cannot perform numeric comparison - return False
113
+ return False
114
+
71
115
  try:
72
116
  if operator in ["eq", "="]:
73
117
  # Handle array fields - check if ANY element equals expected value
@@ -104,27 +148,30 @@ class ValueComparator:
104
148
  if isinstance(expected_value, list) and len(expected_value) == 1:
105
149
  expected_value = expected_value[0]
106
150
  # Handle list fields by checking if ANY element contains the expected value
151
+ # Case-insensitive comparison to match post-processor behavior
107
152
  if isinstance(field_value, list):
108
153
  # For arrays, check if ANY element contains the expected value
109
- return any(str(expected_value) in str(elem) for elem in field_value)
154
+ return any(str(expected_value).lower() in str(elem).lower() for elem in field_value)
110
155
  else:
111
- return str(expected_value) in str(field_value)
156
+ return str(expected_value).lower() in str(field_value).lower()
112
157
  elif operator == "startswith":
113
158
  # Unwrap single-element lists for string operators
114
159
  if isinstance(expected_value, list) and len(expected_value) == 1:
115
160
  expected_value = expected_value[0]
116
161
  # Handle array fields - check if ANY element starts with expected value
162
+ # Case-insensitive comparison to match post-processor behavior
117
163
  if isinstance(field_value, (list, tuple)):
118
- return any(str(elem).startswith(str(expected_value)) for elem in field_value)
119
- return str(field_value).startswith(str(expected_value))
164
+ return any(str(elem).lower().startswith(str(expected_value).lower()) for elem in field_value)
165
+ return str(field_value).lower().startswith(str(expected_value).lower())
120
166
  elif operator == "endswith":
121
167
  # Unwrap single-element lists for string operators
122
168
  if isinstance(expected_value, list) and len(expected_value) == 1:
123
169
  expected_value = expected_value[0]
124
170
  # Handle array fields - check if ANY element ends with expected value
171
+ # Case-insensitive comparison to match post-processor behavior
125
172
  if isinstance(field_value, (list, tuple)):
126
- return any(str(elem).endswith(str(expected_value)) for elem in field_value)
127
- return str(field_value).endswith(str(expected_value))
173
+ return any(str(elem).lower().endswith(str(expected_value).lower()) for elem in field_value)
174
+ return str(field_value).lower().endswith(str(expected_value).lower())
128
175
  elif operator == "in":
129
176
  if isinstance(expected_value, list):
130
177
  if len(expected_value) == 1 and isinstance(field_value, list):
@@ -143,7 +190,13 @@ class ValueComparator:
143
190
  # Unwrap single-element lists for string operators
144
191
  if isinstance(expected_value, list) and len(expected_value) == 1:
145
192
  expected_value = expected_value[0]
146
- return bool(re.search(str(expected_value), str(field_value)))
193
+ # Use cached regex compilation for performance
194
+ try:
195
+ pattern = self._compile_regex(str(expected_value))
196
+ return bool(pattern.search(str(field_value)))
197
+ except (re.error, TypeError):
198
+ # Invalid regex pattern, fall back to no match
199
+ return False
147
200
  elif operator == "cidr":
148
201
  # Unwrap single-element lists for CIDR
149
202
  if isinstance(expected_value, list) and len(expected_value) == 1:
@@ -194,22 +247,31 @@ class ValueComparator:
194
247
  # Unwrap single-element lists for string operators
195
248
  if isinstance(expected_value, list) and len(expected_value) == 1:
196
249
  expected_value = expected_value[0]
197
- return str(expected_value) not in str(field_value)
250
+ # Case-insensitive comparison to match post-processor behavior
251
+ return str(expected_value).lower() not in str(field_value).lower()
198
252
  elif operator == "not_startswith":
199
253
  # Unwrap single-element lists for string operators
200
254
  if isinstance(expected_value, list) and len(expected_value) == 1:
201
255
  expected_value = expected_value[0]
202
- return not str(field_value).startswith(str(expected_value))
256
+ # Case-insensitive comparison to match post-processor behavior
257
+ return not str(field_value).lower().startswith(str(expected_value).lower())
203
258
  elif operator == "not_endswith":
204
259
  # Unwrap single-element lists for string operators
205
260
  if isinstance(expected_value, list) and len(expected_value) == 1:
206
261
  expected_value = expected_value[0]
207
- return not str(field_value).endswith(str(expected_value))
262
+ # Case-insensitive comparison to match post-processor behavior
263
+ return not str(field_value).lower().endswith(str(expected_value).lower())
208
264
  elif operator == "not_regexp":
209
265
  # Unwrap single-element lists for string operators
210
266
  if isinstance(expected_value, list) and len(expected_value) == 1:
211
267
  expected_value = expected_value[0]
212
- return not bool(re.search(str(expected_value), str(field_value)))
268
+ # Use cached regex compilation for performance
269
+ try:
270
+ pattern = self._compile_regex(str(expected_value))
271
+ return not bool(pattern.search(str(field_value)))
272
+ except (re.error, TypeError):
273
+ # Invalid regex pattern, fall back to match (not regexp succeeds)
274
+ return True
213
275
  elif operator == "not_cidr":
214
276
  # Unwrap single-element lists for CIDR
215
277
  if isinstance(expected_value, list) and len(expected_value) == 1:
@@ -491,7 +491,7 @@ class MutatorAnalyzer:
491
491
  for param_name, param_value in geo_params.items():
492
492
  mutator_params.append([param_name, param_value])
493
493
 
494
- geo_mutator = {"name": "geoip_lookup"}
494
+ geo_mutator: Dict[str, Any] = {"name": "geoip_lookup"}
495
495
  if mutator_params:
496
496
  geo_mutator["params"] = mutator_params
497
497
 
@@ -539,7 +539,7 @@ class MutatorAnalyzer:
539
539
  for param_name, param_value in nslookup_params.items():
540
540
  mutator_params.append([param_name, param_value])
541
541
 
542
- nslookup_mutator = {"name": "nslookup"}
542
+ nslookup_mutator: Dict[str, Any] = {"name": "nslookup"}
543
543
  if mutator_params:
544
544
  nslookup_mutator["params"] = mutator_params
545
545
 
@@ -52,7 +52,7 @@ def extract_field_mappings_from_opensearch(
52
52
  try:
53
53
  # Extract field names from the TQL query
54
54
  field_names = tql_instance.extract_fields(tql_query)
55
- logger.info(f"Extracted {len(field_names)} fields from TQL query: {field_names}")
55
+ logger.debug(f"Extracted {len(field_names)} fields from TQL query: {field_names}")
56
56
 
57
57
  if not field_names:
58
58
  logger.warning("No fields found in TQL query")
@@ -68,7 +68,7 @@ def extract_field_mappings_from_opensearch(
68
68
  # Extract and convert mappings to TQL format
69
69
  tql_mappings = _convert_opensearch_mappings_to_tql_format(mapping_response, field_names)
70
70
 
71
- logger.info(f"Successfully converted mappings for {len(tql_mappings)} fields")
71
+ logger.debug(f"Successfully converted mappings for {len(tql_mappings)} fields")
72
72
  return tql_mappings
73
73
 
74
74
  except Exception as e: