tellaro-query-language 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tellaro_query_language-0.1.0.dist-info/LICENSE +21 -0
- tellaro_query_language-0.1.0.dist-info/METADATA +401 -0
- tellaro_query_language-0.1.0.dist-info/RECORD +56 -0
- tellaro_query_language-0.1.0.dist-info/WHEEL +4 -0
- tellaro_query_language-0.1.0.dist-info/entry_points.txt +7 -0
- tql/__init__.py +47 -0
- tql/analyzer.py +385 -0
- tql/cache/__init__.py +7 -0
- tql/cache/base.py +25 -0
- tql/cache/memory.py +63 -0
- tql/cache/redis.py +68 -0
- tql/core.py +929 -0
- tql/core_components/README.md +92 -0
- tql/core_components/__init__.py +20 -0
- tql/core_components/file_operations.py +113 -0
- tql/core_components/opensearch_operations.py +869 -0
- tql/core_components/stats_operations.py +200 -0
- tql/core_components/validation_operations.py +599 -0
- tql/evaluator.py +379 -0
- tql/evaluator_components/README.md +131 -0
- tql/evaluator_components/__init__.py +17 -0
- tql/evaluator_components/field_access.py +176 -0
- tql/evaluator_components/special_expressions.py +296 -0
- tql/evaluator_components/value_comparison.py +315 -0
- tql/exceptions.py +160 -0
- tql/geoip_normalizer.py +233 -0
- tql/mutator_analyzer.py +830 -0
- tql/mutators/__init__.py +222 -0
- tql/mutators/base.py +78 -0
- tql/mutators/dns.py +316 -0
- tql/mutators/encoding.py +218 -0
- tql/mutators/geo.py +363 -0
- tql/mutators/list.py +212 -0
- tql/mutators/network.py +163 -0
- tql/mutators/security.py +225 -0
- tql/mutators/string.py +165 -0
- tql/opensearch.py +78 -0
- tql/opensearch_components/README.md +130 -0
- tql/opensearch_components/__init__.py +17 -0
- tql/opensearch_components/field_mapping.py +399 -0
- tql/opensearch_components/lucene_converter.py +305 -0
- tql/opensearch_components/query_converter.py +775 -0
- tql/opensearch_mappings.py +309 -0
- tql/opensearch_stats.py +451 -0
- tql/parser.py +1363 -0
- tql/parser_components/README.md +72 -0
- tql/parser_components/__init__.py +20 -0
- tql/parser_components/ast_builder.py +162 -0
- tql/parser_components/error_analyzer.py +101 -0
- tql/parser_components/field_extractor.py +112 -0
- tql/parser_components/grammar.py +473 -0
- tql/post_processor.py +737 -0
- tql/scripts.py +124 -0
- tql/stats_evaluator.py +444 -0
- tql/stats_transformer.py +184 -0
- tql/validators.py +110 -0
tql/mutators/list.py
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
"""List evaluation mutators for aggregate operations."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict
|
|
4
|
+
|
|
5
|
+
from .base import BaseMutator
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class AnyMutator(BaseMutator):
|
|
9
|
+
"""
|
|
10
|
+
Mutator that evaluates if any element in a list is truthy.
|
|
11
|
+
|
|
12
|
+
For lists: Returns True if any element is truthy.
|
|
13
|
+
For single values: Returns the truthiness of the value.
|
|
14
|
+
For None/empty: Returns False.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def apply(self, field_name: str, record: Dict[str, Any], value: Any) -> Any: # noqa: C901
|
|
18
|
+
"""Apply the any transformation."""
|
|
19
|
+
if value is None:
|
|
20
|
+
return False
|
|
21
|
+
elif isinstance(value, list):
|
|
22
|
+
return any(value)
|
|
23
|
+
else:
|
|
24
|
+
# For single values, return truthiness
|
|
25
|
+
return bool(value)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class AllMutator(BaseMutator):
|
|
29
|
+
"""
|
|
30
|
+
Mutator that evaluates if all elements in a list are truthy.
|
|
31
|
+
|
|
32
|
+
For lists: Returns True if all elements are truthy.
|
|
33
|
+
For single values: Returns the truthiness of the value.
|
|
34
|
+
For None: Returns False.
|
|
35
|
+
For empty lists: Returns True (following Python's all() behavior).
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def apply(self, field_name: str, record: Dict[str, Any], value: Any) -> Any: # noqa: C901
|
|
39
|
+
"""Apply the all transformation."""
|
|
40
|
+
if value is None:
|
|
41
|
+
return False
|
|
42
|
+
elif isinstance(value, list):
|
|
43
|
+
return all(value)
|
|
44
|
+
else:
|
|
45
|
+
# For single values, return truthiness
|
|
46
|
+
return bool(value)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class AvgMutator(BaseMutator):
|
|
50
|
+
"""
|
|
51
|
+
Mutator that calculates the average of numeric values.
|
|
52
|
+
|
|
53
|
+
For lists: Returns the average of numeric elements.
|
|
54
|
+
For single numeric values: Returns the value itself.
|
|
55
|
+
For non-numeric or empty: Returns None.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
def apply(self, field_name: str, record: Dict[str, Any], value: Any) -> Any: # noqa: C901
|
|
59
|
+
"""Apply the average transformation."""
|
|
60
|
+
if value is None:
|
|
61
|
+
return None
|
|
62
|
+
elif isinstance(value, list):
|
|
63
|
+
numeric_values = []
|
|
64
|
+
for item in value:
|
|
65
|
+
if isinstance(item, (int, float)) and not isinstance(item, bool):
|
|
66
|
+
numeric_values.append(item)
|
|
67
|
+
elif isinstance(item, str):
|
|
68
|
+
# Try to convert string to number
|
|
69
|
+
try:
|
|
70
|
+
numeric_values.append(float(item))
|
|
71
|
+
except ValueError:
|
|
72
|
+
pass
|
|
73
|
+
|
|
74
|
+
if numeric_values:
|
|
75
|
+
return sum(numeric_values) / len(numeric_values)
|
|
76
|
+
else:
|
|
77
|
+
return None
|
|
78
|
+
elif isinstance(value, (int, float)) and not isinstance(value, bool):
|
|
79
|
+
return value
|
|
80
|
+
elif isinstance(value, str):
|
|
81
|
+
# Try to convert string to number
|
|
82
|
+
try:
|
|
83
|
+
return float(value)
|
|
84
|
+
except ValueError:
|
|
85
|
+
return None
|
|
86
|
+
else:
|
|
87
|
+
return None
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
# Alias for AvgMutator
|
|
91
|
+
class AverageMutator(AvgMutator):
|
|
92
|
+
"""Alias for AvgMutator."""
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class SumMutator(BaseMutator):
|
|
96
|
+
"""
|
|
97
|
+
Mutator that calculates the sum of numeric values.
|
|
98
|
+
|
|
99
|
+
For lists: Returns the sum of numeric elements.
|
|
100
|
+
For single numeric values: Returns the value itself.
|
|
101
|
+
For non-numeric or empty: Returns 0.
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
def apply(self, field_name: str, record: Dict[str, Any], value: Any) -> Any: # noqa: C901
|
|
105
|
+
"""Apply the sum transformation."""
|
|
106
|
+
if value is None:
|
|
107
|
+
return 0
|
|
108
|
+
elif isinstance(value, list):
|
|
109
|
+
numeric_values = []
|
|
110
|
+
for item in value:
|
|
111
|
+
if isinstance(item, (int, float)) and not isinstance(item, bool):
|
|
112
|
+
numeric_values.append(item)
|
|
113
|
+
elif isinstance(item, str):
|
|
114
|
+
# Try to convert string to number
|
|
115
|
+
try:
|
|
116
|
+
numeric_values.append(float(item))
|
|
117
|
+
except ValueError:
|
|
118
|
+
pass
|
|
119
|
+
|
|
120
|
+
return sum(numeric_values) if numeric_values else 0
|
|
121
|
+
elif isinstance(value, (int, float)) and not isinstance(value, bool):
|
|
122
|
+
return value
|
|
123
|
+
elif isinstance(value, str):
|
|
124
|
+
# Try to convert string to number
|
|
125
|
+
try:
|
|
126
|
+
return float(value)
|
|
127
|
+
except ValueError:
|
|
128
|
+
return 0
|
|
129
|
+
else:
|
|
130
|
+
return 0
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class MaxMutator(BaseMutator):
|
|
134
|
+
"""
|
|
135
|
+
Mutator that finds the maximum value.
|
|
136
|
+
|
|
137
|
+
For lists: Returns the maximum of comparable elements.
|
|
138
|
+
For single values: Returns the value itself.
|
|
139
|
+
For empty or incomparable: Returns None.
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
def apply(self, field_name: str, record: Dict[str, Any], value: Any) -> Any: # noqa: C901
|
|
143
|
+
"""Apply the max transformation."""
|
|
144
|
+
if value is None:
|
|
145
|
+
return None
|
|
146
|
+
elif isinstance(value, list):
|
|
147
|
+
if not value:
|
|
148
|
+
return None
|
|
149
|
+
|
|
150
|
+
# Filter out None values
|
|
151
|
+
filtered_values = [v for v in value if v is not None]
|
|
152
|
+
if not filtered_values:
|
|
153
|
+
return None
|
|
154
|
+
|
|
155
|
+
try:
|
|
156
|
+
return max(filtered_values)
|
|
157
|
+
except (TypeError, ValueError):
|
|
158
|
+
# If values aren't comparable, try numeric conversion
|
|
159
|
+
numeric_values = []
|
|
160
|
+
for item in filtered_values:
|
|
161
|
+
if isinstance(item, (int, float)) and not isinstance(item, bool):
|
|
162
|
+
numeric_values.append(item)
|
|
163
|
+
elif isinstance(item, str):
|
|
164
|
+
try:
|
|
165
|
+
numeric_values.append(float(item))
|
|
166
|
+
except ValueError:
|
|
167
|
+
pass
|
|
168
|
+
|
|
169
|
+
return max(numeric_values) if numeric_values else None
|
|
170
|
+
else:
|
|
171
|
+
return value
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
class MinMutator(BaseMutator):
|
|
175
|
+
"""
|
|
176
|
+
Mutator that finds the minimum value.
|
|
177
|
+
|
|
178
|
+
For lists: Returns the minimum of comparable elements.
|
|
179
|
+
For single values: Returns the value itself.
|
|
180
|
+
For empty or incomparable: Returns None.
|
|
181
|
+
"""
|
|
182
|
+
|
|
183
|
+
def apply(self, field_name: str, record: Dict[str, Any], value: Any) -> Any: # noqa: C901
|
|
184
|
+
"""Apply the min transformation."""
|
|
185
|
+
if value is None:
|
|
186
|
+
return None
|
|
187
|
+
elif isinstance(value, list):
|
|
188
|
+
if not value:
|
|
189
|
+
return None
|
|
190
|
+
|
|
191
|
+
# Filter out None values
|
|
192
|
+
filtered_values = [v for v in value if v is not None]
|
|
193
|
+
if not filtered_values:
|
|
194
|
+
return None
|
|
195
|
+
|
|
196
|
+
try:
|
|
197
|
+
return min(filtered_values)
|
|
198
|
+
except (TypeError, ValueError):
|
|
199
|
+
# If values aren't comparable, try numeric conversion
|
|
200
|
+
numeric_values = []
|
|
201
|
+
for item in filtered_values:
|
|
202
|
+
if isinstance(item, (int, float)) and not isinstance(item, bool):
|
|
203
|
+
numeric_values.append(item)
|
|
204
|
+
elif isinstance(item, str):
|
|
205
|
+
try:
|
|
206
|
+
numeric_values.append(float(item))
|
|
207
|
+
except ValueError:
|
|
208
|
+
pass
|
|
209
|
+
|
|
210
|
+
return min(numeric_values) if numeric_values else None
|
|
211
|
+
else:
|
|
212
|
+
return value
|
tql/mutators/network.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
"""Network-related mutators for IP address operations."""
|
|
2
|
+
|
|
3
|
+
import ipaddress
|
|
4
|
+
from typing import Any, Dict, Optional
|
|
5
|
+
|
|
6
|
+
from .base import BaseMutator, PerformanceClass
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class IsPrivateMutator(BaseMutator):
|
|
10
|
+
"""
|
|
11
|
+
Mutator that checks if an IP address is in a private range.
|
|
12
|
+
|
|
13
|
+
Performance Characteristics:
|
|
14
|
+
- In-memory: FAST - Simple IP range calculations
|
|
15
|
+
- OpenSearch: MODERATE - Requires post-processing of all results
|
|
16
|
+
|
|
17
|
+
This mutator returns True if the IP is in one of the RFC 1918 private ranges:
|
|
18
|
+
- 10.0.0.0/8 (10.0.0.0 - 10.255.255.255)
|
|
19
|
+
- 172.16.0.0/12 (172.16.0.0 - 172.31.255.255)
|
|
20
|
+
- 192.168.0.0/16 (192.168.0.0 - 192.168.255.255)
|
|
21
|
+
|
|
22
|
+
Also includes other private/special ranges:
|
|
23
|
+
- 127.0.0.0/8 (loopback)
|
|
24
|
+
- 169.254.0.0/16 (link-local)
|
|
25
|
+
- fc00::/7 (IPv6 unique local)
|
|
26
|
+
- fe80::/10 (IPv6 link-local)
|
|
27
|
+
|
|
28
|
+
Used as a filter in queries like: ip | is_private()
|
|
29
|
+
Returns True/False for filtering purposes.
|
|
30
|
+
|
|
31
|
+
Example:
|
|
32
|
+
source_ip | is_private()
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(self, params: Optional[Dict[str, Any]] = None) -> None:
|
|
36
|
+
super().__init__(params)
|
|
37
|
+
self.performance_in_memory = PerformanceClass.FAST
|
|
38
|
+
self.performance_opensearch = PerformanceClass.MODERATE
|
|
39
|
+
|
|
40
|
+
def apply(self, field_name: str, record: Dict[str, Any], value: Any) -> Any:
|
|
41
|
+
"""Check if the value is a private IP address."""
|
|
42
|
+
if value is None:
|
|
43
|
+
return False
|
|
44
|
+
|
|
45
|
+
# Handle lists - return True if any IP is private
|
|
46
|
+
if isinstance(value, list):
|
|
47
|
+
return any(self._is_private_ip(str(item)) for item in value if item is not None)
|
|
48
|
+
|
|
49
|
+
# Single value
|
|
50
|
+
return self._is_private_ip(str(value))
|
|
51
|
+
|
|
52
|
+
def _is_private_ip(self, ip_str: str) -> bool:
|
|
53
|
+
"""Check if a single IP address is private."""
|
|
54
|
+
try:
|
|
55
|
+
ip_obj = ipaddress.ip_address(ip_str)
|
|
56
|
+
|
|
57
|
+
# Check if it's a private address
|
|
58
|
+
# This includes RFC 1918 for IPv4 and fc00::/7 for IPv6
|
|
59
|
+
if ip_obj.is_private:
|
|
60
|
+
return True
|
|
61
|
+
|
|
62
|
+
# Also check other special-use addresses
|
|
63
|
+
if ip_obj.is_loopback: # 127.0.0.0/8 or ::1
|
|
64
|
+
return True
|
|
65
|
+
if ip_obj.is_link_local: # 169.254.0.0/16 or fe80::/10
|
|
66
|
+
return True
|
|
67
|
+
if hasattr(ip_obj, "is_reserved") and ip_obj.is_reserved: # Reserved addresses
|
|
68
|
+
return True
|
|
69
|
+
|
|
70
|
+
return False
|
|
71
|
+
|
|
72
|
+
except (ValueError, AttributeError):
|
|
73
|
+
# Not a valid IP address
|
|
74
|
+
return False
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class IsGlobalMutator(BaseMutator):
|
|
78
|
+
"""
|
|
79
|
+
Mutator that checks if an IP address is globally routable.
|
|
80
|
+
|
|
81
|
+
Performance Characteristics:
|
|
82
|
+
- In-memory: FAST - Simple IP range calculations
|
|
83
|
+
- OpenSearch: MODERATE - Requires post-processing of all results
|
|
84
|
+
|
|
85
|
+
This mutator returns True if the IP is a globally routable address,
|
|
86
|
+
meaning it's not:
|
|
87
|
+
- Private (RFC 1918)
|
|
88
|
+
- Loopback
|
|
89
|
+
- Link-local
|
|
90
|
+
- Multicast
|
|
91
|
+
- Reserved
|
|
92
|
+
- Unspecified
|
|
93
|
+
|
|
94
|
+
Used as a filter in queries like: ip | is_global()
|
|
95
|
+
Returns True/False for filtering purposes.
|
|
96
|
+
|
|
97
|
+
Example:
|
|
98
|
+
destination_ip | is_global()
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
def __init__(self, params: Optional[Dict[str, Any]] = None) -> None:
|
|
102
|
+
super().__init__(params)
|
|
103
|
+
self.performance_in_memory = PerformanceClass.FAST
|
|
104
|
+
self.performance_opensearch = PerformanceClass.MODERATE
|
|
105
|
+
|
|
106
|
+
def apply(self, field_name: str, record: Dict[str, Any], value: Any) -> Any:
|
|
107
|
+
"""Check if the value is a global IP address."""
|
|
108
|
+
if value is None:
|
|
109
|
+
return False
|
|
110
|
+
|
|
111
|
+
# Handle lists - return True if any IP is global
|
|
112
|
+
if isinstance(value, list):
|
|
113
|
+
return any(self._is_global_ip(str(item)) for item in value if item is not None)
|
|
114
|
+
|
|
115
|
+
# Single value
|
|
116
|
+
return self._is_global_ip(str(value))
|
|
117
|
+
|
|
118
|
+
def _is_global_ip(self, ip_str: str) -> bool: # noqa: C901
|
|
119
|
+
"""Check if a single IP address is globally routable."""
|
|
120
|
+
try:
|
|
121
|
+
ip_obj = ipaddress.ip_address(ip_str)
|
|
122
|
+
|
|
123
|
+
# Check if it's NOT any of the special-use addresses
|
|
124
|
+
if ip_obj.is_private:
|
|
125
|
+
return False
|
|
126
|
+
if ip_obj.is_loopback:
|
|
127
|
+
return False
|
|
128
|
+
if ip_obj.is_link_local:
|
|
129
|
+
return False
|
|
130
|
+
if ip_obj.is_multicast:
|
|
131
|
+
return False
|
|
132
|
+
if ip_obj.is_unspecified: # 0.0.0.0 or ::
|
|
133
|
+
return False
|
|
134
|
+
if hasattr(ip_obj, "is_reserved") and ip_obj.is_reserved:
|
|
135
|
+
return False
|
|
136
|
+
|
|
137
|
+
# For IPv4, also check some additional ranges
|
|
138
|
+
if isinstance(ip_obj, ipaddress.IPv4Address):
|
|
139
|
+
# Check for special ranges not covered by is_private
|
|
140
|
+
ip_int = int(ip_obj)
|
|
141
|
+
|
|
142
|
+
# 0.0.0.0/8 - "This" network
|
|
143
|
+
if ip_int >> 24 == 0:
|
|
144
|
+
return False
|
|
145
|
+
|
|
146
|
+
# 100.64.0.0/10 - Shared address space (CGN)
|
|
147
|
+
if ip_int >> 22 == 0x191: # 100.64/10
|
|
148
|
+
return False
|
|
149
|
+
|
|
150
|
+
# 198.18.0.0/15 - Benchmarking
|
|
151
|
+
if ip_int >> 17 == 0x18C9: # 198.18/15
|
|
152
|
+
return False
|
|
153
|
+
|
|
154
|
+
# 240.0.0.0/4 - Reserved (Class E)
|
|
155
|
+
if ip_int >> 28 == 0xF:
|
|
156
|
+
return False
|
|
157
|
+
|
|
158
|
+
# If it passed all checks, it's global
|
|
159
|
+
return True
|
|
160
|
+
|
|
161
|
+
except (ValueError, AttributeError):
|
|
162
|
+
# Not a valid IP address
|
|
163
|
+
return False
|
tql/mutators/security.py
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
"""Security-related mutators for defanging and refanging URLs and indicators."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict
|
|
4
|
+
|
|
5
|
+
from .base import BaseMutator, append_to_result
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class RefangMutator(BaseMutator):
|
|
9
|
+
"""
|
|
10
|
+
Mutator that refangs (un-defangs) URLs and indicators.
|
|
11
|
+
|
|
12
|
+
This mutator reverses common defanging patterns to make URLs and
|
|
13
|
+
indicators clickable/active again. It handles various defanging patterns:
|
|
14
|
+
- hXXp:// -> http://
|
|
15
|
+
- hXXps:// -> https://
|
|
16
|
+
- [.] -> .
|
|
17
|
+
- [.] -> .
|
|
18
|
+
- [:] -> :
|
|
19
|
+
- [:] -> :
|
|
20
|
+
- fXp:// -> ftp://
|
|
21
|
+
- [at] -> @
|
|
22
|
+
- [@] -> @
|
|
23
|
+
|
|
24
|
+
Parameters:
|
|
25
|
+
field: Optional field to store the refanged value
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def apply(self, field_name: str, record: Dict[str, Any], value: Any) -> Any:
|
|
29
|
+
"""Apply the refang transformation."""
|
|
30
|
+
append_field = self.params.get("field")
|
|
31
|
+
|
|
32
|
+
# Handle different input types
|
|
33
|
+
refanged_value: Any
|
|
34
|
+
if value is None:
|
|
35
|
+
refanged_value = None
|
|
36
|
+
elif isinstance(value, str):
|
|
37
|
+
refanged_value = self._refang_string(value)
|
|
38
|
+
elif isinstance(value, list):
|
|
39
|
+
# Refang each string in the list
|
|
40
|
+
refanged_value = []
|
|
41
|
+
for item in value:
|
|
42
|
+
if isinstance(item, str):
|
|
43
|
+
refanged_value.append(self._refang_string(item))
|
|
44
|
+
else:
|
|
45
|
+
refanged_value.append(item)
|
|
46
|
+
elif isinstance(value, (int, float, bool)):
|
|
47
|
+
# Convert to string, refang, then return
|
|
48
|
+
refanged_value = self._refang_string(str(value))
|
|
49
|
+
else:
|
|
50
|
+
# For other types, return as-is
|
|
51
|
+
refanged_value = value
|
|
52
|
+
|
|
53
|
+
# If append_field is specified, add to record and return original value
|
|
54
|
+
if append_field:
|
|
55
|
+
append_to_result(record, append_field, refanged_value)
|
|
56
|
+
return value
|
|
57
|
+
else:
|
|
58
|
+
# Return the refanged value directly
|
|
59
|
+
return refanged_value
|
|
60
|
+
|
|
61
|
+
def _refang_string(self, s: str) -> str:
|
|
62
|
+
"""Refang a single string."""
|
|
63
|
+
result = s
|
|
64
|
+
|
|
65
|
+
# Apply replacements in specific order to handle spaces properly
|
|
66
|
+
# First handle patterns with spaces
|
|
67
|
+
result = result.replace(" [.] ", ".")
|
|
68
|
+
result = result.replace(" [dot] ", ".")
|
|
69
|
+
result = result.replace(" [at] ", "@")
|
|
70
|
+
result = result.replace(" [:] ", ":")
|
|
71
|
+
|
|
72
|
+
# Protocol defanging (various cases)
|
|
73
|
+
result = result.replace("hxxp://", "http://")
|
|
74
|
+
result = result.replace("hXXp://", "http://")
|
|
75
|
+
result = result.replace("HxXp://", "http://")
|
|
76
|
+
result = result.replace("HxxP://", "http://")
|
|
77
|
+
result = result.replace("HXXP://", "http://")
|
|
78
|
+
result = result.replace("hxxps://", "https://")
|
|
79
|
+
result = result.replace("hXXps://", "https://")
|
|
80
|
+
result = result.replace("HXXPS://", "https://")
|
|
81
|
+
result = result.replace("fxp://", "ftp://")
|
|
82
|
+
result = result.replace("fXp://", "ftp://")
|
|
83
|
+
result = result.replace("FXP://", "ftp://")
|
|
84
|
+
|
|
85
|
+
# Dot defanging
|
|
86
|
+
result = result.replace("[.]", ".")
|
|
87
|
+
result = result.replace("(.)", ".")
|
|
88
|
+
result = result.replace("{.}", ".")
|
|
89
|
+
result = result.replace("[dot]", ".")
|
|
90
|
+
result = result.replace("(dot)", ".")
|
|
91
|
+
result = result.replace("{dot}", ".")
|
|
92
|
+
|
|
93
|
+
# Colon defanging
|
|
94
|
+
result = result.replace("[:]", ":")
|
|
95
|
+
result = result.replace("(:)", ":")
|
|
96
|
+
result = result.replace("{:}", ":")
|
|
97
|
+
|
|
98
|
+
# At symbol defanging
|
|
99
|
+
result = result.replace("[at]", "@")
|
|
100
|
+
result = result.replace("(at)", "@")
|
|
101
|
+
result = result.replace("{at}", "@")
|
|
102
|
+
result = result.replace("[@]", "@")
|
|
103
|
+
result = result.replace("(@)", "@")
|
|
104
|
+
result = result.replace("{@}", "@")
|
|
105
|
+
|
|
106
|
+
# Slash defanging
|
|
107
|
+
result = result.replace("[/]", "/")
|
|
108
|
+
result = result.replace("(/)", "/")
|
|
109
|
+
result = result.replace("{/}", "/")
|
|
110
|
+
|
|
111
|
+
return result
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class DefangMutator(BaseMutator):
|
|
115
|
+
"""
|
|
116
|
+
Mutator that defangs URLs and indicators to make them unclickable.
|
|
117
|
+
|
|
118
|
+
This mutator applies common defanging patterns to URLs and indicators
|
|
119
|
+
to prevent accidental clicks or automatic processing:
|
|
120
|
+
- http:// -> hXXp://
|
|
121
|
+
- https:// -> hXXps://
|
|
122
|
+
- . -> [.]
|
|
123
|
+
- : -> [:]
|
|
124
|
+
- @ -> [at]
|
|
125
|
+
- ftp:// -> fXp://
|
|
126
|
+
|
|
127
|
+
Parameters:
|
|
128
|
+
field: Optional field to store the defanged value
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
def apply(self, field_name: str, record: Dict[str, Any], value: Any) -> Any:
|
|
132
|
+
"""Apply the defang transformation."""
|
|
133
|
+
append_field = self.params.get("field")
|
|
134
|
+
|
|
135
|
+
# Handle different input types
|
|
136
|
+
defanged_value: Any
|
|
137
|
+
if value is None:
|
|
138
|
+
defanged_value = None
|
|
139
|
+
elif isinstance(value, str):
|
|
140
|
+
defanged_value = self._defang_string(value)
|
|
141
|
+
elif isinstance(value, list):
|
|
142
|
+
# Defang each string in the list
|
|
143
|
+
defanged_value = []
|
|
144
|
+
for item in value:
|
|
145
|
+
if isinstance(item, str):
|
|
146
|
+
defanged_value.append(self._defang_string(item))
|
|
147
|
+
else:
|
|
148
|
+
defanged_value.append(item)
|
|
149
|
+
elif isinstance(value, (int, float, bool)):
|
|
150
|
+
# Convert to string, defang, then return
|
|
151
|
+
defanged_value = self._defang_string(str(value))
|
|
152
|
+
else:
|
|
153
|
+
# For other types, return as-is
|
|
154
|
+
defanged_value = value
|
|
155
|
+
|
|
156
|
+
# If append_field is specified, add to record and return original value
|
|
157
|
+
if append_field:
|
|
158
|
+
append_to_result(record, append_field, defanged_value)
|
|
159
|
+
return value
|
|
160
|
+
else:
|
|
161
|
+
# Return the defanged value directly
|
|
162
|
+
return defanged_value
|
|
163
|
+
|
|
164
|
+
def _defang_string(self, s: str) -> str:
|
|
165
|
+
"""Defang a single string."""
|
|
166
|
+
# Apply defanging patterns
|
|
167
|
+
result = s
|
|
168
|
+
|
|
169
|
+
# Protocol defanging (do these first to avoid double-defanging)
|
|
170
|
+
result = result.replace("https://", "hXXps://")
|
|
171
|
+
result = result.replace("http://", "hXXp://")
|
|
172
|
+
result = result.replace("ftp://", "fXp://")
|
|
173
|
+
result = result.replace("HTTPS://", "HXXPS://")
|
|
174
|
+
result = result.replace("HTTP://", "HXXP://")
|
|
175
|
+
result = result.replace("FTP://", "FXP://")
|
|
176
|
+
|
|
177
|
+
# Now defang dots, but not in the protocol part we just defanged
|
|
178
|
+
# Split by whitespace to handle individual tokens
|
|
179
|
+
tokens = result.split()
|
|
180
|
+
defanged_tokens = []
|
|
181
|
+
|
|
182
|
+
for token in tokens:
|
|
183
|
+
# Check if this is a URL (has protocol)
|
|
184
|
+
has_protocol = any(
|
|
185
|
+
token.startswith(p)
|
|
186
|
+
for p in [
|
|
187
|
+
"hXXp://",
|
|
188
|
+
"hXXps://",
|
|
189
|
+
"fXp://",
|
|
190
|
+
"HXXP://",
|
|
191
|
+
"HXXPS://",
|
|
192
|
+
"FXP://",
|
|
193
|
+
"hxxp://",
|
|
194
|
+
"hxxps://",
|
|
195
|
+
"fxp://", # Already defanged variations
|
|
196
|
+
]
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
if has_protocol and "://" in token:
|
|
200
|
+
# For URLs, defang only the domain part
|
|
201
|
+
protocol, rest = token.split("://", 1)
|
|
202
|
+
# Only defang if not already defanged
|
|
203
|
+
if "[.]" not in rest and "[at]" not in rest:
|
|
204
|
+
# Defang dots in domain/path
|
|
205
|
+
rest = rest.replace(".", "[.]")
|
|
206
|
+
# Defang @ if present (for URLs with auth)
|
|
207
|
+
rest = rest.replace("@", "[at]")
|
|
208
|
+
# Defang colons in port numbers
|
|
209
|
+
# Only defang colon if it's followed by numbers (port)
|
|
210
|
+
import re
|
|
211
|
+
|
|
212
|
+
rest = re.sub(r":(\d+)", r"[:]\1", rest)
|
|
213
|
+
defanged_tokens.append(f"{protocol}://{rest}")
|
|
214
|
+
else:
|
|
215
|
+
# For non-URL tokens, defang dots and @ symbols
|
|
216
|
+
# But avoid double-defanging
|
|
217
|
+
if "[.]" not in token and "[at]" not in token:
|
|
218
|
+
defanged = token.replace(".", "[.]")
|
|
219
|
+
defanged = defanged.replace("@", "[at]")
|
|
220
|
+
defanged_tokens.append(defanged)
|
|
221
|
+
else:
|
|
222
|
+
# Already defanged, leave as-is
|
|
223
|
+
defanged_tokens.append(token)
|
|
224
|
+
|
|
225
|
+
return " ".join(defanged_tokens)
|