tellaro-query-language 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tellaro_query_language-0.1.0.dist-info/LICENSE +21 -0
- tellaro_query_language-0.1.0.dist-info/METADATA +401 -0
- tellaro_query_language-0.1.0.dist-info/RECORD +56 -0
- tellaro_query_language-0.1.0.dist-info/WHEEL +4 -0
- tellaro_query_language-0.1.0.dist-info/entry_points.txt +7 -0
- tql/__init__.py +47 -0
- tql/analyzer.py +385 -0
- tql/cache/__init__.py +7 -0
- tql/cache/base.py +25 -0
- tql/cache/memory.py +63 -0
- tql/cache/redis.py +68 -0
- tql/core.py +929 -0
- tql/core_components/README.md +92 -0
- tql/core_components/__init__.py +20 -0
- tql/core_components/file_operations.py +113 -0
- tql/core_components/opensearch_operations.py +869 -0
- tql/core_components/stats_operations.py +200 -0
- tql/core_components/validation_operations.py +599 -0
- tql/evaluator.py +379 -0
- tql/evaluator_components/README.md +131 -0
- tql/evaluator_components/__init__.py +17 -0
- tql/evaluator_components/field_access.py +176 -0
- tql/evaluator_components/special_expressions.py +296 -0
- tql/evaluator_components/value_comparison.py +315 -0
- tql/exceptions.py +160 -0
- tql/geoip_normalizer.py +233 -0
- tql/mutator_analyzer.py +830 -0
- tql/mutators/__init__.py +222 -0
- tql/mutators/base.py +78 -0
- tql/mutators/dns.py +316 -0
- tql/mutators/encoding.py +218 -0
- tql/mutators/geo.py +363 -0
- tql/mutators/list.py +212 -0
- tql/mutators/network.py +163 -0
- tql/mutators/security.py +225 -0
- tql/mutators/string.py +165 -0
- tql/opensearch.py +78 -0
- tql/opensearch_components/README.md +130 -0
- tql/opensearch_components/__init__.py +17 -0
- tql/opensearch_components/field_mapping.py +399 -0
- tql/opensearch_components/lucene_converter.py +305 -0
- tql/opensearch_components/query_converter.py +775 -0
- tql/opensearch_mappings.py +309 -0
- tql/opensearch_stats.py +451 -0
- tql/parser.py +1363 -0
- tql/parser_components/README.md +72 -0
- tql/parser_components/__init__.py +20 -0
- tql/parser_components/ast_builder.py +162 -0
- tql/parser_components/error_analyzer.py +101 -0
- tql/parser_components/field_extractor.py +112 -0
- tql/parser_components/grammar.py +473 -0
- tql/post_processor.py +737 -0
- tql/scripts.py +124 -0
- tql/stats_evaluator.py +444 -0
- tql/stats_transformer.py +184 -0
- tql/validators.py +110 -0
tql/mutators/encoding.py
ADDED
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
"""Encoding and decoding mutators."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, Optional
|
|
4
|
+
|
|
5
|
+
from .base import BaseMutator, append_to_result
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Base64EncodeMutator(BaseMutator):
|
|
9
|
+
"""
|
|
10
|
+
Mutator that encodes values to base64.
|
|
11
|
+
|
|
12
|
+
This is an enrichment mutator that can encode strings to base64
|
|
13
|
+
format. It supports encoding individual strings or lists of strings.
|
|
14
|
+
|
|
15
|
+
Parameters:
|
|
16
|
+
field: Optional field to store the encoded value
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, params: Optional[Dict[str, Any]] = None) -> None:
|
|
20
|
+
super().__init__(params)
|
|
21
|
+
self.is_enrichment = True
|
|
22
|
+
|
|
23
|
+
def apply(self, field_name: str, record: Dict[str, Any], value: Any) -> Any: # noqa: C901
|
|
24
|
+
"""Apply the base64 encode transformation."""
|
|
25
|
+
import base64
|
|
26
|
+
|
|
27
|
+
append_field = self.params.get("field")
|
|
28
|
+
|
|
29
|
+
# Handle different input types
|
|
30
|
+
encoded_value: Any
|
|
31
|
+
if value is None:
|
|
32
|
+
encoded_value = None
|
|
33
|
+
elif isinstance(value, str):
|
|
34
|
+
# Encode string to base64
|
|
35
|
+
try:
|
|
36
|
+
encoded_value = base64.b64encode(value.encode("utf-8")).decode("ascii")
|
|
37
|
+
except Exception:
|
|
38
|
+
encoded_value = None
|
|
39
|
+
elif isinstance(value, bytes):
|
|
40
|
+
# Already bytes, encode directly
|
|
41
|
+
try:
|
|
42
|
+
encoded_value = base64.b64encode(value).decode("ascii")
|
|
43
|
+
except Exception:
|
|
44
|
+
encoded_value = None
|
|
45
|
+
elif isinstance(value, list):
|
|
46
|
+
# Encode each item in the list
|
|
47
|
+
encoded_value = []
|
|
48
|
+
for item in value:
|
|
49
|
+
if isinstance(item, str):
|
|
50
|
+
try:
|
|
51
|
+
encoded = base64.b64encode(item.encode("utf-8")).decode("ascii")
|
|
52
|
+
encoded_value.append(encoded)
|
|
53
|
+
except Exception:
|
|
54
|
+
encoded_value.append(None)
|
|
55
|
+
elif isinstance(item, bytes):
|
|
56
|
+
try:
|
|
57
|
+
encoded = base64.b64encode(item).decode("ascii")
|
|
58
|
+
encoded_value.append(encoded)
|
|
59
|
+
except Exception:
|
|
60
|
+
encoded_value.append(None)
|
|
61
|
+
else:
|
|
62
|
+
# For None, keep as None
|
|
63
|
+
if item is None:
|
|
64
|
+
encoded_value.append(None)
|
|
65
|
+
else:
|
|
66
|
+
# Convert to string first
|
|
67
|
+
try:
|
|
68
|
+
encoded = base64.b64encode(str(item).encode("utf-8")).decode("ascii")
|
|
69
|
+
encoded_value.append(encoded)
|
|
70
|
+
except Exception:
|
|
71
|
+
encoded_value.append(None)
|
|
72
|
+
else:
|
|
73
|
+
# For other types, convert to string and encode
|
|
74
|
+
try:
|
|
75
|
+
encoded_value = base64.b64encode(str(value).encode("utf-8")).decode("ascii")
|
|
76
|
+
except Exception:
|
|
77
|
+
encoded_value = None
|
|
78
|
+
|
|
79
|
+
# If field is specified, add to record and return original value
|
|
80
|
+
if append_field:
|
|
81
|
+
append_to_result(record, append_field, encoded_value)
|
|
82
|
+
return value
|
|
83
|
+
else:
|
|
84
|
+
# Return the encoded value directly
|
|
85
|
+
return encoded_value
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class Base64DecodeMutator(BaseMutator):
|
|
89
|
+
"""
|
|
90
|
+
Mutator that decodes base64 values.
|
|
91
|
+
|
|
92
|
+
This is an enrichment mutator that can decode base64-encoded strings
|
|
93
|
+
back to their original form. It supports decoding individual strings
|
|
94
|
+
or lists of strings.
|
|
95
|
+
|
|
96
|
+
Parameters:
|
|
97
|
+
field: Optional field to store the decoded value
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
def __init__(self, params: Optional[Dict[str, Any]] = None) -> None:
|
|
101
|
+
super().__init__(params)
|
|
102
|
+
self.is_enrichment = True
|
|
103
|
+
|
|
104
|
+
def apply(self, field_name: str, record: Dict[str, Any], value: Any) -> Any: # noqa: C901
|
|
105
|
+
"""Apply the base64 decode transformation."""
|
|
106
|
+
import base64
|
|
107
|
+
|
|
108
|
+
append_field = self.params.get("field")
|
|
109
|
+
|
|
110
|
+
# Handle different input types
|
|
111
|
+
decoded_value: Any
|
|
112
|
+
if value is None:
|
|
113
|
+
decoded_value = None
|
|
114
|
+
elif isinstance(value, str):
|
|
115
|
+
# Decode base64 string
|
|
116
|
+
try:
|
|
117
|
+
# Handle padding if missing
|
|
118
|
+
padding = 4 - (len(value) % 4)
|
|
119
|
+
if padding and padding != 4:
|
|
120
|
+
value += "=" * padding
|
|
121
|
+
decoded_value = base64.b64decode(value).decode("utf-8")
|
|
122
|
+
except Exception:
|
|
123
|
+
# If decoding fails, return None or original
|
|
124
|
+
decoded_value = None
|
|
125
|
+
elif isinstance(value, list):
|
|
126
|
+
# Decode each item in the list
|
|
127
|
+
decoded_value = []
|
|
128
|
+
for item in value:
|
|
129
|
+
if isinstance(item, str):
|
|
130
|
+
try:
|
|
131
|
+
# Handle padding if missing
|
|
132
|
+
padding = 4 - (len(item) % 4)
|
|
133
|
+
if padding and padding != 4:
|
|
134
|
+
item += "=" * padding
|
|
135
|
+
decoded = base64.b64decode(item).decode("utf-8")
|
|
136
|
+
decoded_value.append(decoded)
|
|
137
|
+
except Exception:
|
|
138
|
+
decoded_value.append(None)
|
|
139
|
+
else:
|
|
140
|
+
decoded_value.append(item)
|
|
141
|
+
else:
|
|
142
|
+
# For other types, try to decode as string
|
|
143
|
+
try:
|
|
144
|
+
value_str = str(value)
|
|
145
|
+
padding = 4 - (len(value_str) % 4)
|
|
146
|
+
if padding and padding != 4:
|
|
147
|
+
value_str += "=" * padding
|
|
148
|
+
decoded_value = base64.b64decode(value_str).decode("utf-8")
|
|
149
|
+
except Exception:
|
|
150
|
+
decoded_value = None
|
|
151
|
+
|
|
152
|
+
# If field is specified, add to record and return original value
|
|
153
|
+
if append_field:
|
|
154
|
+
append_to_result(record, append_field, decoded_value)
|
|
155
|
+
return value
|
|
156
|
+
else:
|
|
157
|
+
# Return the decoded value directly
|
|
158
|
+
return decoded_value
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
class URLDecodeMutator(BaseMutator):
|
|
162
|
+
"""
|
|
163
|
+
Mutator that decodes URL-encoded values.
|
|
164
|
+
|
|
165
|
+
This is an enrichment mutator that decodes URL-encoded strings
|
|
166
|
+
(e.g., %20 -> space, %2F -> /). It supports decoding individual
|
|
167
|
+
strings or lists of strings.
|
|
168
|
+
|
|
169
|
+
Parameters:
|
|
170
|
+
field: Optional field to store the decoded value
|
|
171
|
+
"""
|
|
172
|
+
|
|
173
|
+
def __init__(self, params: Optional[Dict[str, Any]] = None) -> None:
|
|
174
|
+
super().__init__(params)
|
|
175
|
+
self.is_enrichment = True
|
|
176
|
+
|
|
177
|
+
def apply(self, field_name: str, record: Dict[str, Any], value: Any) -> Any: # noqa: C901
|
|
178
|
+
"""Apply the URL decode transformation."""
|
|
179
|
+
import urllib.parse
|
|
180
|
+
|
|
181
|
+
append_field = self.params.get("field")
|
|
182
|
+
|
|
183
|
+
# Handle different input types
|
|
184
|
+
decoded_value: Any
|
|
185
|
+
if value is None:
|
|
186
|
+
decoded_value = None
|
|
187
|
+
elif isinstance(value, str):
|
|
188
|
+
# Decode URL-encoded string
|
|
189
|
+
try:
|
|
190
|
+
decoded_value = urllib.parse.unquote(value)
|
|
191
|
+
except Exception:
|
|
192
|
+
decoded_value = value # Return original if decode fails
|
|
193
|
+
elif isinstance(value, list):
|
|
194
|
+
# Decode each item in the list
|
|
195
|
+
decoded_value = []
|
|
196
|
+
for item in value:
|
|
197
|
+
if isinstance(item, str):
|
|
198
|
+
try:
|
|
199
|
+
decoded = urllib.parse.unquote(item)
|
|
200
|
+
decoded_value.append(decoded)
|
|
201
|
+
except Exception:
|
|
202
|
+
decoded_value.append(item)
|
|
203
|
+
else:
|
|
204
|
+
decoded_value.append(item)
|
|
205
|
+
else:
|
|
206
|
+
# For other types, convert to string and decode
|
|
207
|
+
try:
|
|
208
|
+
decoded_value = urllib.parse.unquote(str(value))
|
|
209
|
+
except Exception:
|
|
210
|
+
decoded_value = str(value)
|
|
211
|
+
|
|
212
|
+
# If field is specified, add to record and return original value
|
|
213
|
+
if append_field:
|
|
214
|
+
append_to_result(record, append_field, decoded_value)
|
|
215
|
+
return value
|
|
216
|
+
else:
|
|
217
|
+
# Return the decoded value directly
|
|
218
|
+
return decoded_value
|
tql/mutators/geo.py
ADDED
|
@@ -0,0 +1,363 @@
|
|
|
1
|
+
"""GeoIP lookup mutator."""
|
|
2
|
+
|
|
3
|
+
import ipaddress
|
|
4
|
+
import os
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
import maxminddb
|
|
9
|
+
else:
|
|
10
|
+
try:
|
|
11
|
+
import maxminddb
|
|
12
|
+
except ImportError:
|
|
13
|
+
maxminddb = None
|
|
14
|
+
|
|
15
|
+
from ..cache import CacheManager, LocalCacheManager, RedisCacheManager
|
|
16
|
+
from ..exceptions import TQLConfigError
|
|
17
|
+
from ..geoip_normalizer import GeoIPNormalizer
|
|
18
|
+
from .base import BaseMutator, PerformanceClass
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class GeoIPResolver:
|
|
22
|
+
"""Handles GeoIP MMDB file detection and loading."""
|
|
23
|
+
|
|
24
|
+
def __init__(self, config: Optional[Dict[str, str]] = None):
|
|
25
|
+
if maxminddb is None:
|
|
26
|
+
raise ImportError("maxminddb package is required for GeoIP lookups")
|
|
27
|
+
|
|
28
|
+
self.config = config or {}
|
|
29
|
+
self.db_type: Optional[str] = None # Will be set by _load_mmdb_files
|
|
30
|
+
self.mmdb_type: Optional[str] = None # Full type identifier (e.g., 'dbip_pro', 'maxmind_lite')
|
|
31
|
+
self.mmdb_readers = self._load_mmdb_files()
|
|
32
|
+
|
|
33
|
+
def _load_mmdb_files(self) -> Dict[str, Any]: # noqa: C901
|
|
34
|
+
"""Load MMDB files with smart detection."""
|
|
35
|
+
# Check for explicit full DB path first
|
|
36
|
+
if self.config.get("db_path") and os.path.exists(self.config["db_path"]):
|
|
37
|
+
# Detect DB type from filename
|
|
38
|
+
db_path = self.config["db_path"]
|
|
39
|
+
db_lower = db_path.lower()
|
|
40
|
+
if "dbip" in db_lower or "db-ip" in db_lower:
|
|
41
|
+
self.db_type = "dbip"
|
|
42
|
+
if "lite" in db_lower:
|
|
43
|
+
self.mmdb_type = "dbip_lite"
|
|
44
|
+
else:
|
|
45
|
+
self.mmdb_type = "dbip_pro"
|
|
46
|
+
else:
|
|
47
|
+
self.db_type = "maxmind"
|
|
48
|
+
if "lite" in db_lower:
|
|
49
|
+
self.mmdb_type = "maxmind_lite"
|
|
50
|
+
else:
|
|
51
|
+
self.mmdb_type = "maxmind_pro"
|
|
52
|
+
return {"full": maxminddb.open_database(db_path)}
|
|
53
|
+
|
|
54
|
+
# Check base path for auto-detection
|
|
55
|
+
base_path = self.config.get("base_path", os.environ.get("TQL_GEOIP_MMDB_PATH", "/usr/share/geoip"))
|
|
56
|
+
|
|
57
|
+
# Priority order for database detection
|
|
58
|
+
db_patterns: List[Dict[str, Any]] = [
|
|
59
|
+
# Single file databases (contains all data)
|
|
60
|
+
{"type": "full", "files": ["dbip-full.mmdb"], "vendor": "dbip", "mmdb_type": "dbip_pro"}, # DB-IP paid
|
|
61
|
+
{
|
|
62
|
+
"type": "full",
|
|
63
|
+
"files": ["GeoIP2-City.mmdb"],
|
|
64
|
+
"vendor": "maxmind",
|
|
65
|
+
"mmdb_type": "maxmind_pro",
|
|
66
|
+
}, # MaxMind paid (City includes Country)
|
|
67
|
+
# Multi-file databases (need all files for complete data)
|
|
68
|
+
{
|
|
69
|
+
"type": "multi",
|
|
70
|
+
"files": {"city": "GeoIP2-City.mmdb", "asn": "GeoIP2-ASN.mmdb"},
|
|
71
|
+
"vendor": "maxmind",
|
|
72
|
+
"mmdb_type": "maxmind_pro",
|
|
73
|
+
}, # MaxMind paid (separate)
|
|
74
|
+
{
|
|
75
|
+
"type": "multi",
|
|
76
|
+
"files": {
|
|
77
|
+
"city": "dbip-city-lite.mmdb",
|
|
78
|
+
"country": "dbip-country-lite.mmdb",
|
|
79
|
+
"asn": "dbip-asn-lite.mmdb",
|
|
80
|
+
},
|
|
81
|
+
"vendor": "dbip",
|
|
82
|
+
"mmdb_type": "dbip_lite",
|
|
83
|
+
}, # DB-IP free
|
|
84
|
+
{
|
|
85
|
+
"type": "multi",
|
|
86
|
+
"files": {"city": "GeoLite2-City.mmdb", "asn": "GeoLite2-ASN.mmdb"},
|
|
87
|
+
"vendor": "maxmind",
|
|
88
|
+
"mmdb_type": "maxmind_lite",
|
|
89
|
+
}, # MaxMind free
|
|
90
|
+
]
|
|
91
|
+
|
|
92
|
+
# Try each pattern in priority order
|
|
93
|
+
for pattern in db_patterns:
|
|
94
|
+
if pattern["type"] == "full":
|
|
95
|
+
# Single file contains all data
|
|
96
|
+
for filename in pattern["files"]:
|
|
97
|
+
path = os.path.join(base_path, filename)
|
|
98
|
+
if os.path.exists(path):
|
|
99
|
+
self.db_type = pattern["vendor"]
|
|
100
|
+
self.mmdb_type = pattern["mmdb_type"]
|
|
101
|
+
return {"full": maxminddb.open_database(path)}
|
|
102
|
+
else:
|
|
103
|
+
# Multiple files needed
|
|
104
|
+
readers = {}
|
|
105
|
+
all_found = True
|
|
106
|
+
for db_type, filename in pattern["files"].items():
|
|
107
|
+
path = self.config.get(f"{db_type}_db") or os.path.join(base_path, filename)
|
|
108
|
+
if os.path.exists(path):
|
|
109
|
+
readers[db_type] = maxminddb.open_database(path)
|
|
110
|
+
else:
|
|
111
|
+
all_found = False
|
|
112
|
+
break
|
|
113
|
+
|
|
114
|
+
if all_found and readers:
|
|
115
|
+
self.db_type = pattern["vendor"]
|
|
116
|
+
self.mmdb_type = pattern["mmdb_type"]
|
|
117
|
+
return readers
|
|
118
|
+
|
|
119
|
+
raise TQLConfigError(
|
|
120
|
+
f"No GeoIP MMDB files found in {base_path}. " f"Supported: DB-IP (paid/free) or MaxMind (GeoIP2/GeoLite2)"
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
def lookup(self, ip: str) -> Optional[Dict[str, Any]]:
|
|
124
|
+
"""Lookup IP and return raw result."""
|
|
125
|
+
try:
|
|
126
|
+
if "full" in self.mmdb_readers:
|
|
127
|
+
raw_data = self.mmdb_readers["full"].get(ip)
|
|
128
|
+
return raw_data
|
|
129
|
+
else:
|
|
130
|
+
# Combine data from multiple files
|
|
131
|
+
result = {}
|
|
132
|
+
if "city" in self.mmdb_readers:
|
|
133
|
+
city_data = self.mmdb_readers["city"].get(ip)
|
|
134
|
+
if city_data:
|
|
135
|
+
result.update(city_data)
|
|
136
|
+
if "country" in self.mmdb_readers:
|
|
137
|
+
country_data = self.mmdb_readers["country"].get(ip)
|
|
138
|
+
if country_data:
|
|
139
|
+
result.update(country_data)
|
|
140
|
+
if "asn" in self.mmdb_readers:
|
|
141
|
+
asn_data = self.mmdb_readers["asn"].get(ip)
|
|
142
|
+
if asn_data:
|
|
143
|
+
result.update(asn_data)
|
|
144
|
+
return result if result else None
|
|
145
|
+
except Exception:
|
|
146
|
+
return None
|
|
147
|
+
|
|
148
|
+
def close(self):
|
|
149
|
+
"""Close all MMDB readers."""
|
|
150
|
+
for reader in self.mmdb_readers.values():
|
|
151
|
+
if hasattr(reader, "close"):
|
|
152
|
+
reader.close()
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
class GeoIPLookupMutator(BaseMutator):
|
|
156
|
+
"""
|
|
157
|
+
Mutator that performs GeoIP lookups on IP addresses using MMDB files.
|
|
158
|
+
Returns normalized data following ECS (Elastic Common Schema) conventions.
|
|
159
|
+
|
|
160
|
+
Performance Characteristics:
|
|
161
|
+
- In-memory: MODERATE - Local database lookups with caching
|
|
162
|
+
- OpenSearch: SLOW - Post-processing overhead plus database lookups
|
|
163
|
+
|
|
164
|
+
Parameters:
|
|
165
|
+
db_path: Path to GeoIP database file
|
|
166
|
+
cache: Enable caching (default: True)
|
|
167
|
+
cache_ttl: Cache TTL in seconds (default: 86400)
|
|
168
|
+
force: Force new lookup even if data exists (default: False)
|
|
169
|
+
save: Save enrichment to record (default: True)
|
|
170
|
+
field: Field name to store results
|
|
171
|
+
|
|
172
|
+
Example:
|
|
173
|
+
ip_address | geoip_lookup(cache=true) contains 'US'
|
|
174
|
+
"""
|
|
175
|
+
|
|
176
|
+
# Class-level cache and resolver
|
|
177
|
+
_cache_manager: Optional[CacheManager] = None
|
|
178
|
+
_geo_resolver: Optional[GeoIPResolver] = None
|
|
179
|
+
|
|
180
|
+
def __init__(self, params: Optional[Dict[str, Any]] = None) -> None:
|
|
181
|
+
super().__init__(params)
|
|
182
|
+
self.is_enrichment = True
|
|
183
|
+
# GeoIP uses local database, so it's moderate in memory
|
|
184
|
+
self.performance_in_memory = PerformanceClass.MODERATE
|
|
185
|
+
# Slower in OpenSearch due to post-processing overhead
|
|
186
|
+
self.performance_opensearch = PerformanceClass.SLOW
|
|
187
|
+
|
|
188
|
+
@classmethod
|
|
189
|
+
def initialize_cache(cls, cache_backend: Optional[str] = None):
|
|
190
|
+
"""Initialize the cache manager based on configuration."""
|
|
191
|
+
if cache_backend == "redis":
|
|
192
|
+
# Initialize Redis cache
|
|
193
|
+
redis_host = os.environ.get("TQL_REDIS_HOST", "localhost")
|
|
194
|
+
redis_port = int(os.environ.get("TQL_REDIS_PORT", 6379))
|
|
195
|
+
redis_password = os.environ.get("TQL_REDIS_PASSWORD")
|
|
196
|
+
redis_db = int(os.environ.get("TQL_REDIS_DB", 0))
|
|
197
|
+
|
|
198
|
+
try:
|
|
199
|
+
import redis # pylint: disable=import-error
|
|
200
|
+
|
|
201
|
+
redis_client = redis.Redis(
|
|
202
|
+
host=redis_host, port=redis_port, password=redis_password, db=redis_db, decode_responses=True
|
|
203
|
+
)
|
|
204
|
+
# Test connection
|
|
205
|
+
redis_client.ping()
|
|
206
|
+
cls._cache_manager = RedisCacheManager(redis_client)
|
|
207
|
+
except Exception:
|
|
208
|
+
# Fall back to local cache on Redis connection error
|
|
209
|
+
cls._cache_manager = LocalCacheManager()
|
|
210
|
+
elif cache_backend != "none":
|
|
211
|
+
# Default to local cache
|
|
212
|
+
max_size = int(os.environ.get("TQL_CACHE_LOCAL_MAX_SIZE", 10000))
|
|
213
|
+
default_ttl = int(os.environ.get("TQL_CACHE_LOCAL_TTL", 3600))
|
|
214
|
+
cls._cache_manager = LocalCacheManager(max_size=max_size, default_ttl=default_ttl)
|
|
215
|
+
|
|
216
|
+
@classmethod
|
|
217
|
+
def get_cache_manager(cls) -> Optional[CacheManager]:
|
|
218
|
+
"""Get or create the cache manager."""
|
|
219
|
+
if cls._cache_manager is None:
|
|
220
|
+
cache_backend = os.environ.get("TQL_CACHE_BACKEND", "local")
|
|
221
|
+
cls.initialize_cache(cache_backend)
|
|
222
|
+
return cls._cache_manager
|
|
223
|
+
|
|
224
|
+
@classmethod
|
|
225
|
+
def get_geo_resolver(cls, config: Optional[Dict[str, str]] = None) -> GeoIPResolver:
|
|
226
|
+
"""Get or create the GeoIP resolver."""
|
|
227
|
+
if cls._geo_resolver is None:
|
|
228
|
+
cls._geo_resolver = GeoIPResolver(config)
|
|
229
|
+
return cls._geo_resolver
|
|
230
|
+
|
|
231
|
+
def _get_field_value(self, record: Dict[str, Any], field_path: str) -> Any:
|
|
232
|
+
"""Get a field value from a record, supporting nested fields."""
|
|
233
|
+
parts = field_path.split(".")
|
|
234
|
+
current = record
|
|
235
|
+
|
|
236
|
+
for part in parts:
|
|
237
|
+
if isinstance(current, dict) and part in current:
|
|
238
|
+
current = current[part]
|
|
239
|
+
else:
|
|
240
|
+
return None
|
|
241
|
+
|
|
242
|
+
return current
|
|
243
|
+
|
|
244
|
+
def apply(self, field_name: str, record: Dict[str, Any], value: Any) -> Any: # noqa: C901
|
|
245
|
+
"""
|
|
246
|
+
Apply GeoIP lookup to an IP address.
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
field_name: The name of the field being processed.
|
|
250
|
+
record: The full record (not modified for this mutator).
|
|
251
|
+
value: The IP address to lookup.
|
|
252
|
+
|
|
253
|
+
Returns:
|
|
254
|
+
Normalized GeoIP data dictionary or None if lookup fails.
|
|
255
|
+
"""
|
|
256
|
+
# Check if maxminddb is available
|
|
257
|
+
if maxminddb is None:
|
|
258
|
+
raise ImportError("maxminddb package is required for GeoIP lookups")
|
|
259
|
+
|
|
260
|
+
# Validate input
|
|
261
|
+
if not isinstance(value, str):
|
|
262
|
+
return None
|
|
263
|
+
|
|
264
|
+
# Validate IP address
|
|
265
|
+
try:
|
|
266
|
+
ipaddress.ip_address(value)
|
|
267
|
+
except ValueError:
|
|
268
|
+
return None
|
|
269
|
+
|
|
270
|
+
# Check if we should force lookup
|
|
271
|
+
force_lookup = self.params.get("force", False)
|
|
272
|
+
|
|
273
|
+
# Keep track of existing geo data for potential restoration
|
|
274
|
+
existing_geo_data = None
|
|
275
|
+
existing_as_data = None
|
|
276
|
+
|
|
277
|
+
# Check if geo data already exists in the record
|
|
278
|
+
if "." in field_name:
|
|
279
|
+
# For nested fields like destination.ip, check destination.geo and destination.as
|
|
280
|
+
parent_path = field_name.rsplit(".", 1)[0]
|
|
281
|
+
parent = self._get_field_value(record, parent_path)
|
|
282
|
+
if isinstance(parent, dict):
|
|
283
|
+
existing_geo_data = parent.get("geo")
|
|
284
|
+
existing_as_data = parent.get("as")
|
|
285
|
+
else:
|
|
286
|
+
# For top-level fields, check enrichment.geo and enrichment.as
|
|
287
|
+
if "enrichment" in record and isinstance(record["enrichment"], dict):
|
|
288
|
+
existing_geo_data = record["enrichment"].get("geo")
|
|
289
|
+
existing_as_data = record["enrichment"].get("as")
|
|
290
|
+
|
|
291
|
+
# If not forcing and geo data exists with at least country_iso_code, return existing
|
|
292
|
+
if (
|
|
293
|
+
not force_lookup
|
|
294
|
+
and existing_geo_data
|
|
295
|
+
and isinstance(existing_geo_data, dict)
|
|
296
|
+
and "country_iso_code" in existing_geo_data
|
|
297
|
+
):
|
|
298
|
+
result = {}
|
|
299
|
+
if existing_geo_data:
|
|
300
|
+
result["geo"] = existing_geo_data
|
|
301
|
+
if existing_as_data:
|
|
302
|
+
result["as"] = existing_as_data
|
|
303
|
+
return result if result else None
|
|
304
|
+
|
|
305
|
+
# Check if caching is disabled
|
|
306
|
+
use_cache = self.params.get("cache", True)
|
|
307
|
+
cache_ttl = self.params.get("cache_ttl", 86400) # 24 hours default
|
|
308
|
+
|
|
309
|
+
# Get cache manager
|
|
310
|
+
cache_manager = self.get_cache_manager() if use_cache else None
|
|
311
|
+
|
|
312
|
+
# Try cache first
|
|
313
|
+
if cache_manager:
|
|
314
|
+
cache_key = f"geo:{value}"
|
|
315
|
+
cached_result = cache_manager.get(cache_key)
|
|
316
|
+
if cached_result is not None:
|
|
317
|
+
return cached_result
|
|
318
|
+
|
|
319
|
+
# Get GeoIP resolver and perform lookup
|
|
320
|
+
try:
|
|
321
|
+
geo_config = {}
|
|
322
|
+
if self.params.get("db_path"):
|
|
323
|
+
geo_config["db_path"] = self.params["db_path"]
|
|
324
|
+
elif os.getenv("TQL_GEOIP_FULL_PATH"):
|
|
325
|
+
# Use environment variable for direct database path
|
|
326
|
+
geo_config["db_path"] = os.getenv("TQL_GEOIP_FULL_PATH")
|
|
327
|
+
geo_resolver = self.get_geo_resolver(geo_config)
|
|
328
|
+
|
|
329
|
+
# Perform lookup
|
|
330
|
+
raw_data = geo_resolver.lookup(value)
|
|
331
|
+
|
|
332
|
+
# Normalize the result
|
|
333
|
+
if raw_data and geo_resolver.db_type:
|
|
334
|
+
normalized = GeoIPNormalizer.normalize(raw_data, geo_resolver.db_type, geo_resolver.mmdb_type)
|
|
335
|
+
else:
|
|
336
|
+
normalized = None
|
|
337
|
+
except Exception:
|
|
338
|
+
# Geo lookup failed (e.g., no database configured)
|
|
339
|
+
normalized = None
|
|
340
|
+
|
|
341
|
+
# Cache the result
|
|
342
|
+
if cache_manager and use_cache:
|
|
343
|
+
cache_manager.set(cache_key, normalized, ttl=cache_ttl)
|
|
344
|
+
|
|
345
|
+
# Handle force parameter logic
|
|
346
|
+
if force_lookup:
|
|
347
|
+
# When force=true, we always try a fresh lookup
|
|
348
|
+
# If lookup succeeded, return the new data
|
|
349
|
+
# If lookup failed, return None (don't fall back to existing data)
|
|
350
|
+
return normalized
|
|
351
|
+
else:
|
|
352
|
+
# When force=false, prefer existing data if available
|
|
353
|
+
if normalized is None and (existing_geo_data or existing_as_data):
|
|
354
|
+
# Lookup failed but we have existing data - return it
|
|
355
|
+
result = {}
|
|
356
|
+
if existing_geo_data:
|
|
357
|
+
result["geo"] = existing_geo_data
|
|
358
|
+
if existing_as_data:
|
|
359
|
+
result["as"] = existing_as_data
|
|
360
|
+
return result
|
|
361
|
+
else:
|
|
362
|
+
# Either lookup succeeded or no existing data
|
|
363
|
+
return normalized
|