tellaro-query-language 0.2.1__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/PKG-INFO +2 -1
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/pyproject.toml +21 -1
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/__init__.py +1 -1
- tellaro_query_language-0.2.3/src/tql/cache/base.py +97 -0
- tellaro_query_language-0.2.3/src/tql/cache/memory.py +171 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/core.py +19 -2
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/core_components/opensearch_operations.py +23 -4
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/evaluator.py +2 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/evaluator_components/value_comparison.py +74 -12
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/mutator_analyzer.py +2 -2
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/opensearch_mappings.py +2 -2
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/opensearch_stats.py +3 -1
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/parser.py +52 -21
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/post_processor.py +39 -10
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/scripts.py +19 -2
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/stats_evaluator.py +5 -3
- tellaro_query_language-0.2.1/src/tql/cache/base.py +0 -25
- tellaro_query_language-0.2.1/src/tql/cache/memory.py +0 -63
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/LICENSE +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/README.md +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/analyzer.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/cache/__init__.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/cache/redis.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/core_components/README.md +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/core_components/__init__.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/core_components/file_operations.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/core_components/stats_operations.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/core_components/validation_operations.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/evaluator_components/README.md +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/evaluator_components/__init__.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/evaluator_components/field_access.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/evaluator_components/special_expressions.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/exceptions.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/geoip_normalizer.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/mutators/__init__.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/mutators/base.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/mutators/dns.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/mutators/encoding.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/mutators/geo.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/mutators/list.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/mutators/network.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/mutators/security.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/mutators/string.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/opensearch.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/opensearch_components/README.md +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/opensearch_components/__init__.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/opensearch_components/field_mapping.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/opensearch_components/lucene_converter.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/opensearch_components/query_converter.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/parser_components/README.md +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/parser_components/__init__.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/parser_components/ast_builder.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/parser_components/error_analyzer.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/parser_components/field_extractor.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/parser_components/grammar.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/stats_transformer.py +0 -0
- {tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/validators.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: tellaro-query-language
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: A flexible, human-friendly query language for searching and filtering structured data
|
|
5
5
|
Home-page: https://github.com/tellaro/tellaro-query-language
|
|
6
6
|
License: MIT
|
|
@@ -24,6 +24,7 @@ Requires-Dist: opensearch-dsl (>=2.1.0,<3.0.0) ; extra == "opensearch"
|
|
|
24
24
|
Requires-Dist: opensearch-py (>=2.4.2,<3.0.0) ; extra == "opensearch"
|
|
25
25
|
Requires-Dist: pyparsing (>=3.2.1,<4.0.0)
|
|
26
26
|
Requires-Dist: setuptools (>=80.0.0,<81.0.0)
|
|
27
|
+
Requires-Dist: urllib3 (>=2.5.0,<3.0.0)
|
|
27
28
|
Project-URL: Documentation, https://github.com/tellaro/tellaro-query-language/tree/main/docs
|
|
28
29
|
Project-URL: Repository, https://github.com/tellaro/tellaro-query-language
|
|
29
30
|
Description-Content-Type: text/markdown
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "tellaro-query-language"
|
|
3
|
-
version = "0.2.
|
|
3
|
+
version = "0.2.3"
|
|
4
4
|
description = "A flexible, human-friendly query language for searching and filtering structured data"
|
|
5
5
|
authors = ["Justin Henderson <justin@tellaro.io>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -39,6 +39,7 @@ dnspython = "^2.7.0"
|
|
|
39
39
|
opensearch-py = {version = "^2.4.2", optional = true}
|
|
40
40
|
opensearch-dsl = {version = "^2.1.0", optional = true}
|
|
41
41
|
maxminddb = "^2.7.0"
|
|
42
|
+
urllib3 = "^2.5.0"
|
|
42
43
|
|
|
43
44
|
[tool.poetry.extras]
|
|
44
45
|
opensearch = ["opensearch-py", "opensearch-dsl"]
|
|
@@ -94,3 +95,22 @@ line_length = 120
|
|
|
94
95
|
filterwarnings = [
|
|
95
96
|
"ignore:Unverified HTTPS request:urllib3.exceptions.InsecureRequestWarning",
|
|
96
97
|
]
|
|
98
|
+
# Only collect tests from these directories
|
|
99
|
+
testpaths = [
|
|
100
|
+
"tests",
|
|
101
|
+
]
|
|
102
|
+
# Don't collect from these directories
|
|
103
|
+
norecursedirs = [
|
|
104
|
+
".*",
|
|
105
|
+
"build",
|
|
106
|
+
"dist",
|
|
107
|
+
"*.egg",
|
|
108
|
+
"venv",
|
|
109
|
+
"env",
|
|
110
|
+
"__pycache__",
|
|
111
|
+
"playground",
|
|
112
|
+
]
|
|
113
|
+
# Custom markers
|
|
114
|
+
markers = [
|
|
115
|
+
"integration: marks tests as integration tests (deselect with '-m \"not integration\"')",
|
|
116
|
+
]
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""Base cache infrastructure for TQL.
|
|
2
|
+
|
|
3
|
+
This module provides the base CacheManager class that defines the caching
|
|
4
|
+
interface used throughout TQL. Concrete implementations include LocalCacheManager
|
|
5
|
+
for in-memory caching and RedisCacheManager for distributed caching.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Any, Dict, Optional
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class CacheManager:
|
|
12
|
+
"""Base class for cache management.
|
|
13
|
+
|
|
14
|
+
This class defines the interface for all cache implementations in TQL.
|
|
15
|
+
Subclasses should override these methods to provide actual caching functionality.
|
|
16
|
+
|
|
17
|
+
The base implementation provides no-op defaults that can be safely used when
|
|
18
|
+
caching is disabled or not needed.
|
|
19
|
+
|
|
20
|
+
Example:
|
|
21
|
+
>>> cache = LocalCacheManager()
|
|
22
|
+
>>> cache.set("user:123", {"name": "Alice", "age": 30}, ttl=3600)
|
|
23
|
+
>>> user = cache.get("user:123")
|
|
24
|
+
>>> cache.delete("user:123")
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def get(self, key: str) -> Optional[Any]:
|
|
28
|
+
"""Retrieve a value from the cache.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
key: The cache key to look up. Should be a string identifier.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
The cached value if it exists and hasn't expired, None otherwise.
|
|
35
|
+
|
|
36
|
+
Example:
|
|
37
|
+
>>> value = cache.get("my_key")
|
|
38
|
+
>>> if value is not None:
|
|
39
|
+
... print(f"Found: {value}")
|
|
40
|
+
"""
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
def set(self, key: str, value: Any, ttl: Optional[int] = None) -> None:
|
|
44
|
+
"""Store a value in the cache.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
key: The cache key under which to store the value.
|
|
48
|
+
value: The value to cache. Can be any Python object.
|
|
49
|
+
ttl: Time-to-live in seconds. If None or 0, the value never expires.
|
|
50
|
+
|
|
51
|
+
Example:
|
|
52
|
+
>>> cache.set("config", {"debug": True}, ttl=300) # Cache for 5 minutes
|
|
53
|
+
>>> cache.set("permanent", {"version": "1.0"}) # Never expires
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def delete(self, key: str) -> None:
|
|
57
|
+
"""Remove a value from the cache.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
key: The cache key to delete.
|
|
61
|
+
|
|
62
|
+
Example:
|
|
63
|
+
>>> cache.delete("expired_key")
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
def clear_pattern(self, pattern: str) -> int: # pylint: disable=unused-argument
|
|
67
|
+
"""Clear all keys matching a pattern.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
pattern: A pattern string to match keys. Format depends on implementation.
|
|
71
|
+
For Redis: supports wildcards like "user:*" or "session:?123"
|
|
72
|
+
For Local: basic string matching
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
The number of keys that were deleted.
|
|
76
|
+
|
|
77
|
+
Example:
|
|
78
|
+
>>> count = cache.clear_pattern("temp:*")
|
|
79
|
+
>>> print(f"Cleared {count} temporary keys")
|
|
80
|
+
"""
|
|
81
|
+
return 0
|
|
82
|
+
|
|
83
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
84
|
+
"""Get cache statistics and metrics.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
Dictionary containing cache statistics such as:
|
|
88
|
+
- hit_rate: Cache hit rate percentage
|
|
89
|
+
- miss_rate: Cache miss rate percentage
|
|
90
|
+
- size: Number of items in cache
|
|
91
|
+
- memory_usage: Memory used by cache (if available)
|
|
92
|
+
|
|
93
|
+
Example:
|
|
94
|
+
>>> stats = cache.get_stats()
|
|
95
|
+
>>> print(f"Hit rate: {stats.get('hit_rate', 0)}%")
|
|
96
|
+
"""
|
|
97
|
+
return {}
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
"""In-memory cache implementation for TQL.
|
|
2
|
+
|
|
3
|
+
This module provides a simple in-memory cache with TTL (time-to-live) support
|
|
4
|
+
and basic LRU (Least Recently Used) eviction when the cache reaches its size limit.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import time
|
|
8
|
+
from typing import Any, Dict, Optional
|
|
9
|
+
|
|
10
|
+
from .base import CacheManager
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class LocalCacheManager(CacheManager):
|
|
14
|
+
"""Local in-memory cache with TTL and LRU eviction.
|
|
15
|
+
|
|
16
|
+
This implementation provides thread-safe in-memory caching suitable for
|
|
17
|
+
single-process applications. For distributed caching across multiple
|
|
18
|
+
processes or servers, use RedisCacheManager instead.
|
|
19
|
+
|
|
20
|
+
Features:
|
|
21
|
+
- TTL-based expiration
|
|
22
|
+
- LRU eviction when cache is full
|
|
23
|
+
- Hit/miss statistics tracking
|
|
24
|
+
- Pattern-based key clearing
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
max_size: Maximum number of items to store (default: 10000)
|
|
28
|
+
default_ttl: Default time-to-live in seconds (default: 3600 = 1 hour)
|
|
29
|
+
|
|
30
|
+
Example:
|
|
31
|
+
>>> cache = LocalCacheManager(max_size=1000, default_ttl=600)
|
|
32
|
+
>>> cache.set("user:123", {"name": "Alice"}, ttl=300)
|
|
33
|
+
>>> user = cache.get("user:123")
|
|
34
|
+
>>> stats = cache.get_stats()
|
|
35
|
+
>>> print(f"Hit rate: {stats['hit_rate']:.2%}")
|
|
36
|
+
|
|
37
|
+
Attributes:
|
|
38
|
+
max_size: Maximum cache size
|
|
39
|
+
default_ttl: Default TTL for cached items
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__(self, max_size: int = 10000, default_ttl: int = 3600):
|
|
43
|
+
"""Initialize the local cache.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
max_size: Maximum number of items to cache before eviction starts.
|
|
47
|
+
default_ttl: Default expiration time in seconds for cached items.
|
|
48
|
+
"""
|
|
49
|
+
self.max_size = max_size
|
|
50
|
+
self.default_ttl = default_ttl
|
|
51
|
+
self._cache: Dict[str, Any] = {}
|
|
52
|
+
self._expiry: Dict[str, float] = {}
|
|
53
|
+
self._hits = 0
|
|
54
|
+
self._misses = 0
|
|
55
|
+
|
|
56
|
+
def get(self, key: str) -> Optional[Any]:
|
|
57
|
+
"""Retrieve value from cache if not expired.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
key: The cache key to retrieve.
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
The cached value if present and not expired, None otherwise.
|
|
64
|
+
|
|
65
|
+
Note:
|
|
66
|
+
This method automatically removes expired keys when accessed.
|
|
67
|
+
Hit/miss statistics are updated on each call.
|
|
68
|
+
"""
|
|
69
|
+
if key in self._cache:
|
|
70
|
+
expiry = self._expiry.get(key, float('inf'))
|
|
71
|
+
if expiry == 0 or expiry > time.time():
|
|
72
|
+
self._hits += 1
|
|
73
|
+
return self._cache[key]
|
|
74
|
+
else:
|
|
75
|
+
# Expired - clean up
|
|
76
|
+
del self._cache[key]
|
|
77
|
+
del self._expiry[key]
|
|
78
|
+
self._misses += 1
|
|
79
|
+
return None
|
|
80
|
+
|
|
81
|
+
def set(self, key: str, value: Any, ttl: Optional[int] = None) -> None:
|
|
82
|
+
"""Store value in cache with optional TTL.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
key: The cache key under which to store the value.
|
|
86
|
+
value: The value to cache (any Python object).
|
|
87
|
+
ttl: Time-to-live in seconds. If None, uses default_ttl.
|
|
88
|
+
If 0, the item never expires.
|
|
89
|
+
|
|
90
|
+
Note:
|
|
91
|
+
When the cache is full (reaches max_size), the oldest item
|
|
92
|
+
is evicted to make room for the new one (LRU eviction).
|
|
93
|
+
"""
|
|
94
|
+
if len(self._cache) >= self.max_size and key not in self._cache:
|
|
95
|
+
# Simple eviction: remove oldest (first in dict)
|
|
96
|
+
oldest_key = next(iter(self._cache))
|
|
97
|
+
del self._cache[oldest_key]
|
|
98
|
+
self._expiry.pop(oldest_key, None)
|
|
99
|
+
|
|
100
|
+
self._cache[key] = value
|
|
101
|
+
if ttl == 0:
|
|
102
|
+
# Never expires
|
|
103
|
+
self._expiry[key] = 0
|
|
104
|
+
else:
|
|
105
|
+
expiry_time = time.time() + (ttl if ttl is not None else self.default_ttl)
|
|
106
|
+
self._expiry[key] = expiry_time
|
|
107
|
+
|
|
108
|
+
def delete(self, key: str) -> None:
|
|
109
|
+
"""Remove value from cache.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
key: The cache key to delete.
|
|
113
|
+
|
|
114
|
+
Note:
|
|
115
|
+
If the key doesn't exist, this method does nothing (no error raised).
|
|
116
|
+
"""
|
|
117
|
+
self._cache.pop(key, None)
|
|
118
|
+
self._expiry.pop(key, None)
|
|
119
|
+
|
|
120
|
+
def clear_pattern(self, pattern: str) -> int:
|
|
121
|
+
"""Clear all keys matching a glob pattern.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
pattern: A glob pattern to match keys. Supports wildcards:
|
|
125
|
+
- '*' matches any sequence of characters
|
|
126
|
+
- '?' matches any single character
|
|
127
|
+
- '[seq]' matches any character in seq
|
|
128
|
+
- '[!seq]' matches any character not in seq
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
The number of keys that were deleted.
|
|
132
|
+
|
|
133
|
+
Example:
|
|
134
|
+
>>> cache.set("user:123", data1)
|
|
135
|
+
>>> cache.set("user:456", data2)
|
|
136
|
+
>>> cache.set("session:789", data3)
|
|
137
|
+
>>> count = cache.clear_pattern("user:*") # Deletes user:123 and user:456
|
|
138
|
+
>>> print(count) # 2
|
|
139
|
+
"""
|
|
140
|
+
import fnmatch
|
|
141
|
+
|
|
142
|
+
keys_to_delete = [k for k in self._cache.keys() if fnmatch.fnmatch(k, pattern)]
|
|
143
|
+
for key in keys_to_delete:
|
|
144
|
+
del self._cache[key]
|
|
145
|
+
self._expiry.pop(key, None)
|
|
146
|
+
return len(keys_to_delete)
|
|
147
|
+
|
|
148
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
149
|
+
"""Get cache performance statistics.
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
Dictionary containing:
|
|
153
|
+
- hits: Number of successful cache retrievals
|
|
154
|
+
- misses: Number of cache misses
|
|
155
|
+
- hit_rate: Ratio of hits to total requests (0.0 to 1.0)
|
|
156
|
+
- size: Current number of items in cache
|
|
157
|
+
- max_size: Maximum cache capacity
|
|
158
|
+
|
|
159
|
+
Example:
|
|
160
|
+
>>> stats = cache.get_stats()
|
|
161
|
+
>>> print(f"Cache is {stats['hit_rate']:.2%} effective")
|
|
162
|
+
>>> print(f"Using {stats['size']}/{stats['max_size']} slots")
|
|
163
|
+
"""
|
|
164
|
+
total_requests = self._hits + self._misses
|
|
165
|
+
return {
|
|
166
|
+
"hits": self._hits,
|
|
167
|
+
"misses": self._misses,
|
|
168
|
+
"hit_rate": self._hits / total_requests if total_requests > 0 else 0.0,
|
|
169
|
+
"size": len(self._cache),
|
|
170
|
+
"max_size": self.max_size,
|
|
171
|
+
}
|
|
@@ -100,8 +100,25 @@ class TQL:
|
|
|
100
100
|
# This is an OpenSearch-style mapping, map field to itself
|
|
101
101
|
self._simple_mappings[k] = k
|
|
102
102
|
else:
|
|
103
|
-
#
|
|
104
|
-
|
|
103
|
+
# Intelligent field mapping extraction for complex mappings
|
|
104
|
+
# Priority: 1) Key matching field name, 2) Key without dots (primary field), 3) First key
|
|
105
|
+
|
|
106
|
+
if k in v:
|
|
107
|
+
# Field name exists as key in mapping (e.g., {"username": {"username": "keyword", ...}})
|
|
108
|
+
self._simple_mappings[k] = k
|
|
109
|
+
else:
|
|
110
|
+
# Find primary field (keys without dots, not starting with underscore)
|
|
111
|
+
primary_fields = [
|
|
112
|
+
field_key for field_key in v.keys()
|
|
113
|
+
if '.' not in field_key and not field_key.startswith('_')
|
|
114
|
+
]
|
|
115
|
+
|
|
116
|
+
if primary_fields:
|
|
117
|
+
# Use first primary field
|
|
118
|
+
self._simple_mappings[k] = primary_fields[0]
|
|
119
|
+
else:
|
|
120
|
+
# Fallback to first key (maintain backward compatibility)
|
|
121
|
+
self._simple_mappings[k] = next(iter(v.keys()))
|
|
105
122
|
else:
|
|
106
123
|
# Default to mapping field to itself
|
|
107
124
|
self._simple_mappings[k] = k
|
|
@@ -239,7 +239,7 @@ class OpenSearchOperations:
|
|
|
239
239
|
analysis_result = self.analyze_opensearch_query(query)
|
|
240
240
|
has_mutators = isinstance(analysis_result, MutatorAnalysisResult)
|
|
241
241
|
needs_post_processing_for_stats = (
|
|
242
|
-
has_mutators and bool(analysis_result.post_processing_requirements) if has_mutators else False
|
|
242
|
+
has_mutators and bool(analysis_result.post_processing_requirements) if has_mutators else False # type: ignore[union-attr]
|
|
243
243
|
)
|
|
244
244
|
|
|
245
245
|
# Handle stats queries differently
|
|
@@ -258,7 +258,7 @@ class OpenSearchOperations:
|
|
|
258
258
|
if filter_ast:
|
|
259
259
|
# Use the optimized AST if we have mutators
|
|
260
260
|
if has_mutators and needs_post_processing_for_stats:
|
|
261
|
-
filter_query = backend.convert(analysis_result.optimized_ast.get("filter", filter_ast))["query"]
|
|
261
|
+
filter_query = backend.convert(analysis_result.optimized_ast.get("filter", filter_ast))["query"] # type: ignore[union-attr]
|
|
262
262
|
else:
|
|
263
263
|
filter_query = backend.convert(filter_ast)["query"]
|
|
264
264
|
else:
|
|
@@ -529,6 +529,8 @@ class OpenSearchOperations:
|
|
|
529
529
|
stats_evaluator = TQLStatsEvaluator()
|
|
530
530
|
|
|
531
531
|
# Execute the stats aggregation in memory
|
|
532
|
+
if stats_ast_for_post_processing is None:
|
|
533
|
+
raise ValueError("Stats AST is None but phase2 processing was requested")
|
|
532
534
|
stats_results = stats_evaluator.evaluate_stats(filtered_docs, stats_ast_for_post_processing, {})
|
|
533
535
|
|
|
534
536
|
# Format response for stats-only (no documents)
|
|
@@ -547,7 +549,7 @@ class OpenSearchOperations:
|
|
|
547
549
|
"performance_impact": {
|
|
548
550
|
"overhead_ms": 0, # Would need timing to calculate
|
|
549
551
|
"documents_processed": len(all_documents),
|
|
550
|
-
"mutators_applied": len(analysis_result.post_processing_requirements) if has_mutators else 0,
|
|
552
|
+
"mutators_applied": len(analysis_result.post_processing_requirements) if has_mutators else 0, # type: ignore[union-attr]
|
|
551
553
|
},
|
|
552
554
|
"opensearch_query": complete_opensearch_query,
|
|
553
555
|
}
|
|
@@ -580,6 +582,8 @@ class OpenSearchOperations:
|
|
|
580
582
|
translator = OpenSearchStatsTranslator()
|
|
581
583
|
|
|
582
584
|
# Transform the response using the translator
|
|
585
|
+
if stats_ast is None:
|
|
586
|
+
raise ValueError("Stats AST is None but grouping was detected")
|
|
583
587
|
transformed_response = translator.transform_response(response, stats_ast)
|
|
584
588
|
|
|
585
589
|
# The transformed response already has the correct structure
|
|
@@ -925,6 +929,21 @@ class OpenSearchOperations:
|
|
|
925
929
|
# Get opensearch total before filtering
|
|
926
930
|
opensearch_total = total_hits
|
|
927
931
|
|
|
932
|
+
# Track optimization features used in this query
|
|
933
|
+
optimizations_applied = []
|
|
934
|
+
if scan_all:
|
|
935
|
+
optimizations_applied.append("scroll_api")
|
|
936
|
+
if needs_phase2 and pagination_stats and pagination_stats.get("pages_checked", 0) > 1:
|
|
937
|
+
optimizations_applied.append("auto_pagination")
|
|
938
|
+
if request_cache:
|
|
939
|
+
optimizations_applied.append("request_cache")
|
|
940
|
+
if preference:
|
|
941
|
+
optimizations_applied.append("preference_routing")
|
|
942
|
+
if routing:
|
|
943
|
+
optimizations_applied.append("custom_routing")
|
|
944
|
+
if terminate_after:
|
|
945
|
+
optimizations_applied.append("early_termination")
|
|
946
|
+
|
|
928
947
|
result = {
|
|
929
948
|
"results": results,
|
|
930
949
|
"total": len(results),
|
|
@@ -934,7 +953,7 @@ class OpenSearchOperations:
|
|
|
934
953
|
"health_status": health_status,
|
|
935
954
|
"health_reasons": health_reasons,
|
|
936
955
|
"performance_impact": performance_impact,
|
|
937
|
-
"optimizations_applied":
|
|
956
|
+
"optimizations_applied": optimizations_applied,
|
|
938
957
|
"opensearch_query": (
|
|
939
958
|
complete_opensearch_query if "complete_opensearch_query" in locals() else {}
|
|
940
959
|
), # Include the full query body
|
|
@@ -350,6 +350,8 @@ class TQLEvaluator:
|
|
|
350
350
|
return left_missing or right_missing
|
|
351
351
|
elif node_type == "unary_op":
|
|
352
352
|
# Don't recurse through NOT operators - they handle missing fields themselves
|
|
353
|
+
# The NOT operator has special logic at lines 213-254 that handles missing fields correctly
|
|
354
|
+
# Recursing here would cause double-handling and incorrect results
|
|
353
355
|
return False
|
|
354
356
|
elif node_type == "collection_op":
|
|
355
357
|
field_name = node["field"]
|
|
@@ -6,6 +6,7 @@ operator implementations, and special cases like CIDR matching.
|
|
|
6
6
|
|
|
7
7
|
import ipaddress
|
|
8
8
|
import re
|
|
9
|
+
from functools import lru_cache
|
|
9
10
|
from typing import Any
|
|
10
11
|
|
|
11
12
|
|
|
@@ -15,6 +16,23 @@ class ValueComparator:
|
|
|
15
16
|
# Sentinel value to distinguish missing fields from None values
|
|
16
17
|
_MISSING_FIELD = object()
|
|
17
18
|
|
|
19
|
+
@staticmethod
|
|
20
|
+
@lru_cache(maxsize=256)
|
|
21
|
+
def _compile_regex(pattern: str) -> re.Pattern:
|
|
22
|
+
"""Compile and cache regex patterns for performance.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
pattern: Regex pattern string
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
Compiled regex pattern
|
|
29
|
+
|
|
30
|
+
Note:
|
|
31
|
+
Uses LRU cache with max 256 patterns. This significantly improves
|
|
32
|
+
performance when the same regex patterns are used repeatedly in queries.
|
|
33
|
+
"""
|
|
34
|
+
return re.compile(pattern)
|
|
35
|
+
|
|
18
36
|
def compare_values(self, field_value: Any, operator: str, expected_value: Any) -> bool: # noqa: C901
|
|
19
37
|
"""Compare a field value against an expected value using the given operator.
|
|
20
38
|
|
|
@@ -49,9 +67,17 @@ class ValueComparator:
|
|
|
49
67
|
return False
|
|
50
68
|
|
|
51
69
|
# Handle None field values (field exists but is None)
|
|
70
|
+
# IMPORTANT: None is a valid value, distinct from missing fields.
|
|
71
|
+
# For 'exists' operator: This code path should NOT be reached because 'exists'
|
|
72
|
+
# checks field presence in the record, not the value. The evaluator handles
|
|
73
|
+
# 'exists' before calling compare_values. If we reach here with None, it means
|
|
74
|
+
# the field exists but has None value, which should NOT match 'exists'.
|
|
52
75
|
if field_value is None:
|
|
53
76
|
if operator in ["exists"]:
|
|
54
|
-
|
|
77
|
+
# Field key exists in record but value is None
|
|
78
|
+
# Semantics: 'exists' means "field has a non-null value"
|
|
79
|
+
# This matches database behavior where NULL != EXISTS
|
|
80
|
+
return False # None value does not satisfy 'exists'
|
|
55
81
|
elif operator in ["is"]:
|
|
56
82
|
# Check for null comparison - expected_value can be None or "null"
|
|
57
83
|
return expected_value is None or (isinstance(expected_value, str) and expected_value.lower() == "null")
|
|
@@ -59,6 +85,10 @@ class ValueComparator:
|
|
|
59
85
|
return False
|
|
60
86
|
|
|
61
87
|
# Convert numeric strings to numbers for comparison
|
|
88
|
+
# IMPORTANT: Store original values to check if conversion succeeded
|
|
89
|
+
field_value_original = field_value
|
|
90
|
+
expected_value_original = expected_value
|
|
91
|
+
|
|
62
92
|
field_value = self._convert_numeric(field_value)
|
|
63
93
|
expected_value = self._convert_numeric(expected_value)
|
|
64
94
|
|
|
@@ -68,6 +98,20 @@ class ValueComparator:
|
|
|
68
98
|
if isinstance(field_value, str) and field_value.lower() in ["true", "false"]:
|
|
69
99
|
field_value = field_value.lower() == "true"
|
|
70
100
|
|
|
101
|
+
# Type compatibility check for numeric operators
|
|
102
|
+
# If operator requires numeric comparison, both values must be numeric
|
|
103
|
+
# Exception: Arrays are handled specially in the operator logic below
|
|
104
|
+
if operator in ["gt", "gte", "lt", "lte", ">", ">=", "<", "<="]:
|
|
105
|
+
# Skip check if field_value is an array - handled by array logic below
|
|
106
|
+
if not isinstance(field_value, (list, tuple)):
|
|
107
|
+
field_is_numeric = isinstance(field_value, (int, float)) and not isinstance(field_value, bool)
|
|
108
|
+
expected_is_numeric = isinstance(expected_value, (int, float)) and not isinstance(expected_value, bool)
|
|
109
|
+
|
|
110
|
+
if not (field_is_numeric and expected_is_numeric):
|
|
111
|
+
# At least one value failed numeric conversion
|
|
112
|
+
# Cannot perform numeric comparison - return False
|
|
113
|
+
return False
|
|
114
|
+
|
|
71
115
|
try:
|
|
72
116
|
if operator in ["eq", "="]:
|
|
73
117
|
# Handle array fields - check if ANY element equals expected value
|
|
@@ -104,27 +148,30 @@ class ValueComparator:
|
|
|
104
148
|
if isinstance(expected_value, list) and len(expected_value) == 1:
|
|
105
149
|
expected_value = expected_value[0]
|
|
106
150
|
# Handle list fields by checking if ANY element contains the expected value
|
|
151
|
+
# Case-insensitive comparison to match post-processor behavior
|
|
107
152
|
if isinstance(field_value, list):
|
|
108
153
|
# For arrays, check if ANY element contains the expected value
|
|
109
|
-
return any(str(expected_value) in str(elem) for elem in field_value)
|
|
154
|
+
return any(str(expected_value).lower() in str(elem).lower() for elem in field_value)
|
|
110
155
|
else:
|
|
111
|
-
return str(expected_value) in str(field_value)
|
|
156
|
+
return str(expected_value).lower() in str(field_value).lower()
|
|
112
157
|
elif operator == "startswith":
|
|
113
158
|
# Unwrap single-element lists for string operators
|
|
114
159
|
if isinstance(expected_value, list) and len(expected_value) == 1:
|
|
115
160
|
expected_value = expected_value[0]
|
|
116
161
|
# Handle array fields - check if ANY element starts with expected value
|
|
162
|
+
# Case-insensitive comparison to match post-processor behavior
|
|
117
163
|
if isinstance(field_value, (list, tuple)):
|
|
118
|
-
return any(str(elem).startswith(str(expected_value)) for elem in field_value)
|
|
119
|
-
return str(field_value).startswith(str(expected_value))
|
|
164
|
+
return any(str(elem).lower().startswith(str(expected_value).lower()) for elem in field_value)
|
|
165
|
+
return str(field_value).lower().startswith(str(expected_value).lower())
|
|
120
166
|
elif operator == "endswith":
|
|
121
167
|
# Unwrap single-element lists for string operators
|
|
122
168
|
if isinstance(expected_value, list) and len(expected_value) == 1:
|
|
123
169
|
expected_value = expected_value[0]
|
|
124
170
|
# Handle array fields - check if ANY element ends with expected value
|
|
171
|
+
# Case-insensitive comparison to match post-processor behavior
|
|
125
172
|
if isinstance(field_value, (list, tuple)):
|
|
126
|
-
return any(str(elem).endswith(str(expected_value)) for elem in field_value)
|
|
127
|
-
return str(field_value).endswith(str(expected_value))
|
|
173
|
+
return any(str(elem).lower().endswith(str(expected_value).lower()) for elem in field_value)
|
|
174
|
+
return str(field_value).lower().endswith(str(expected_value).lower())
|
|
128
175
|
elif operator == "in":
|
|
129
176
|
if isinstance(expected_value, list):
|
|
130
177
|
if len(expected_value) == 1 and isinstance(field_value, list):
|
|
@@ -143,7 +190,13 @@ class ValueComparator:
|
|
|
143
190
|
# Unwrap single-element lists for string operators
|
|
144
191
|
if isinstance(expected_value, list) and len(expected_value) == 1:
|
|
145
192
|
expected_value = expected_value[0]
|
|
146
|
-
|
|
193
|
+
# Use cached regex compilation for performance
|
|
194
|
+
try:
|
|
195
|
+
pattern = self._compile_regex(str(expected_value))
|
|
196
|
+
return bool(pattern.search(str(field_value)))
|
|
197
|
+
except (re.error, TypeError):
|
|
198
|
+
# Invalid regex pattern, fall back to no match
|
|
199
|
+
return False
|
|
147
200
|
elif operator == "cidr":
|
|
148
201
|
# Unwrap single-element lists for CIDR
|
|
149
202
|
if isinstance(expected_value, list) and len(expected_value) == 1:
|
|
@@ -194,22 +247,31 @@ class ValueComparator:
|
|
|
194
247
|
# Unwrap single-element lists for string operators
|
|
195
248
|
if isinstance(expected_value, list) and len(expected_value) == 1:
|
|
196
249
|
expected_value = expected_value[0]
|
|
197
|
-
|
|
250
|
+
# Case-insensitive comparison to match post-processor behavior
|
|
251
|
+
return str(expected_value).lower() not in str(field_value).lower()
|
|
198
252
|
elif operator == "not_startswith":
|
|
199
253
|
# Unwrap single-element lists for string operators
|
|
200
254
|
if isinstance(expected_value, list) and len(expected_value) == 1:
|
|
201
255
|
expected_value = expected_value[0]
|
|
202
|
-
|
|
256
|
+
# Case-insensitive comparison to match post-processor behavior
|
|
257
|
+
return not str(field_value).lower().startswith(str(expected_value).lower())
|
|
203
258
|
elif operator == "not_endswith":
|
|
204
259
|
# Unwrap single-element lists for string operators
|
|
205
260
|
if isinstance(expected_value, list) and len(expected_value) == 1:
|
|
206
261
|
expected_value = expected_value[0]
|
|
207
|
-
|
|
262
|
+
# Case-insensitive comparison to match post-processor behavior
|
|
263
|
+
return not str(field_value).lower().endswith(str(expected_value).lower())
|
|
208
264
|
elif operator == "not_regexp":
|
|
209
265
|
# Unwrap single-element lists for string operators
|
|
210
266
|
if isinstance(expected_value, list) and len(expected_value) == 1:
|
|
211
267
|
expected_value = expected_value[0]
|
|
212
|
-
|
|
268
|
+
# Use cached regex compilation for performance
|
|
269
|
+
try:
|
|
270
|
+
pattern = self._compile_regex(str(expected_value))
|
|
271
|
+
return not bool(pattern.search(str(field_value)))
|
|
272
|
+
except (re.error, TypeError):
|
|
273
|
+
# Invalid regex pattern, fall back to match (not regexp succeeds)
|
|
274
|
+
return True
|
|
213
275
|
elif operator == "not_cidr":
|
|
214
276
|
# Unwrap single-element lists for CIDR
|
|
215
277
|
if isinstance(expected_value, list) and len(expected_value) == 1:
|
|
@@ -491,7 +491,7 @@ class MutatorAnalyzer:
|
|
|
491
491
|
for param_name, param_value in geo_params.items():
|
|
492
492
|
mutator_params.append([param_name, param_value])
|
|
493
493
|
|
|
494
|
-
geo_mutator = {"name": "geoip_lookup"}
|
|
494
|
+
geo_mutator: Dict[str, Any] = {"name": "geoip_lookup"}
|
|
495
495
|
if mutator_params:
|
|
496
496
|
geo_mutator["params"] = mutator_params
|
|
497
497
|
|
|
@@ -539,7 +539,7 @@ class MutatorAnalyzer:
|
|
|
539
539
|
for param_name, param_value in nslookup_params.items():
|
|
540
540
|
mutator_params.append([param_name, param_value])
|
|
541
541
|
|
|
542
|
-
nslookup_mutator = {"name": "nslookup"}
|
|
542
|
+
nslookup_mutator: Dict[str, Any] = {"name": "nslookup"}
|
|
543
543
|
if mutator_params:
|
|
544
544
|
nslookup_mutator["params"] = mutator_params
|
|
545
545
|
|
{tellaro_query_language-0.2.1 → tellaro_query_language-0.2.3}/src/tql/opensearch_mappings.py
RENAMED
|
@@ -52,7 +52,7 @@ def extract_field_mappings_from_opensearch(
|
|
|
52
52
|
try:
|
|
53
53
|
# Extract field names from the TQL query
|
|
54
54
|
field_names = tql_instance.extract_fields(tql_query)
|
|
55
|
-
logger.
|
|
55
|
+
logger.debug(f"Extracted {len(field_names)} fields from TQL query: {field_names}")
|
|
56
56
|
|
|
57
57
|
if not field_names:
|
|
58
58
|
logger.warning("No fields found in TQL query")
|
|
@@ -68,7 +68,7 @@ def extract_field_mappings_from_opensearch(
|
|
|
68
68
|
# Extract and convert mappings to TQL format
|
|
69
69
|
tql_mappings = _convert_opensearch_mappings_to_tql_format(mapping_response, field_names)
|
|
70
70
|
|
|
71
|
-
logger.
|
|
71
|
+
logger.debug(f"Successfully converted mappings for {len(tql_mappings)} fields")
|
|
72
72
|
return tql_mappings
|
|
73
73
|
|
|
74
74
|
except Exception as e:
|