tellaro-query-language 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/METADATA +24 -1
- {tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/RECORD +27 -27
- tql/core.py +225 -54
- tql/core_components/opensearch_operations.py +415 -99
- tql/core_components/stats_operations.py +11 -1
- tql/evaluator.py +39 -2
- tql/evaluator_components/special_expressions.py +25 -6
- tql/evaluator_components/value_comparison.py +31 -3
- tql/mutator_analyzer.py +640 -242
- tql/mutators/__init__.py +5 -1
- tql/mutators/dns.py +76 -53
- tql/mutators/security.py +101 -100
- tql/mutators/string.py +74 -0
- tql/opensearch_components/field_mapping.py +9 -3
- tql/opensearch_components/lucene_converter.py +12 -0
- tql/opensearch_components/query_converter.py +134 -25
- tql/opensearch_mappings.py +2 -2
- tql/opensearch_stats.py +170 -39
- tql/parser.py +92 -37
- tql/parser_components/ast_builder.py +37 -1
- tql/parser_components/field_extractor.py +9 -1
- tql/parser_components/grammar.py +32 -8
- tql/post_processor.py +489 -31
- tql/stats_evaluator.py +170 -12
- {tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/LICENSE +0 -0
- {tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/WHEEL +0 -0
- {tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/entry_points.txt +0 -0
tql/mutators/__init__.py
CHANGED
|
@@ -26,7 +26,7 @@ from .list import (
|
|
|
26
26
|
)
|
|
27
27
|
from .network import IsGlobalMutator, IsPrivateMutator
|
|
28
28
|
from .security import DefangMutator, RefangMutator
|
|
29
|
-
from .string import LengthMutator, LowercaseMutator, SplitMutator, TrimMutator, UppercaseMutator
|
|
29
|
+
from .string import LengthMutator, LowercaseMutator, ReplaceMutator, SplitMutator, TrimMutator, UppercaseMutator
|
|
30
30
|
|
|
31
31
|
# Maintain backward compatibility
|
|
32
32
|
__all__ = [
|
|
@@ -39,6 +39,7 @@ __all__ = [
|
|
|
39
39
|
"TrimMutator",
|
|
40
40
|
"SplitMutator",
|
|
41
41
|
"LengthMutator",
|
|
42
|
+
"ReplaceMutator",
|
|
42
43
|
# Encoding mutators
|
|
43
44
|
"Base64EncodeMutator",
|
|
44
45
|
"Base64DecodeMutator",
|
|
@@ -82,6 +83,7 @@ ALLOWED_MUTATORS: Dict[str, Optional[Dict[str, type]]] = {
|
|
|
82
83
|
"trim": None,
|
|
83
84
|
"split": {"delimiter": str, "field": str},
|
|
84
85
|
"length": {"field": str},
|
|
86
|
+
"replace": {"find": str, "replace": str, "field": str},
|
|
85
87
|
# URL and security transform mutators
|
|
86
88
|
"refang": {"field": str},
|
|
87
89
|
"defang": {"field": str},
|
|
@@ -166,6 +168,8 @@ def create_mutator(name: str, params: Optional[List[List[Any]]] = None) -> BaseM
|
|
|
166
168
|
return SplitMutator(params_dict)
|
|
167
169
|
elif key == "length":
|
|
168
170
|
return LengthMutator(params_dict)
|
|
171
|
+
elif key == "replace":
|
|
172
|
+
return ReplaceMutator(params_dict)
|
|
169
173
|
elif key == "refang":
|
|
170
174
|
return RefangMutator(params_dict)
|
|
171
175
|
elif key == "defang":
|
tql/mutators/dns.py
CHANGED
|
@@ -31,18 +31,32 @@ class NSLookupMutator(BaseMutator):
|
|
|
31
31
|
- Perform reverse DNS lookups (IP to hostname)
|
|
32
32
|
- Query specific DNS record types
|
|
33
33
|
- Support force lookup to bypass existing data
|
|
34
|
-
- Return
|
|
34
|
+
- Return ECS-compliant DNS data without modifying the original field value
|
|
35
|
+
|
|
36
|
+
Field Storage (ECS-compliant):
|
|
37
|
+
- destination.ip | nslookup → stores at destination.domain
|
|
38
|
+
- source.ip | nslookup → stores at source.domain
|
|
39
|
+
- ip | nslookup → stores at domain
|
|
40
|
+
- Multiple queries store as array of ECS DNS objects
|
|
35
41
|
|
|
36
42
|
Parameters:
|
|
37
43
|
servers: List of DNS server IPs to use (optional)
|
|
38
|
-
|
|
44
|
+
field: Field name to store results (default: auto-detect from field path)
|
|
45
|
+
append_field: Legacy parameter name for field (deprecated)
|
|
39
46
|
force: Force new lookup even if data exists (default: False)
|
|
40
47
|
save: Save enrichment to record (default: True)
|
|
41
48
|
types: List of DNS record types to query (default: auto-detect)
|
|
42
|
-
field: Field name to store results (preferred over append_field)
|
|
43
49
|
|
|
44
|
-
|
|
45
|
-
|
|
50
|
+
Examples:
|
|
51
|
+
# Basic usage with ECS-compliant storage
|
|
52
|
+
destination.ip | nslookup
|
|
53
|
+
source.ip | nslookup
|
|
54
|
+
|
|
55
|
+
# Custom DNS servers
|
|
56
|
+
hostname | nslookup(servers=['8.8.8.8'])
|
|
57
|
+
|
|
58
|
+
# Custom storage location
|
|
59
|
+
ip | nslookup(field='custom.dns_data')
|
|
46
60
|
"""
|
|
47
61
|
|
|
48
62
|
def __init__(self, params: Optional[Dict[str, Any]] = None) -> None:
|
|
@@ -232,17 +246,43 @@ class NSLookupMutator(BaseMutator):
|
|
|
232
246
|
|
|
233
247
|
# Save enrichment if requested
|
|
234
248
|
if save_enrichment:
|
|
235
|
-
#
|
|
249
|
+
# Always store ECS data directly, never use IP addresses as field names
|
|
236
250
|
if len(queries) == 1 and queries[0] in resolved_results:
|
|
237
|
-
#
|
|
251
|
+
# Single query: store the ECS data directly
|
|
238
252
|
append_to_result(record, append_field, resolved_results[queries[0]])
|
|
253
|
+
elif len(queries) > 1:
|
|
254
|
+
# Multiple queries: store as array of ECS results
|
|
255
|
+
results_array = []
|
|
256
|
+
for query in queries:
|
|
257
|
+
if query in resolved_results:
|
|
258
|
+
results_array.append(resolved_results[query])
|
|
259
|
+
append_to_result(record, append_field, results_array)
|
|
239
260
|
else:
|
|
240
|
-
#
|
|
241
|
-
append_to_result(record, append_field,
|
|
242
|
-
|
|
243
|
-
# For enrichment
|
|
244
|
-
#
|
|
245
|
-
|
|
261
|
+
# No results
|
|
262
|
+
append_to_result(record, append_field, None)
|
|
263
|
+
|
|
264
|
+
# For enrichment mutators, return data for comparison
|
|
265
|
+
# The full enrichment data is stored via append_to_result above
|
|
266
|
+
# Return value is used for field comparison (e.g., contains 'dns.google')
|
|
267
|
+
|
|
268
|
+
if len(queries) == 1 and queries[0] in resolved_results:
|
|
269
|
+
# Single query: return the first answer for comparison
|
|
270
|
+
dns_data = resolved_results[queries[0]]
|
|
271
|
+
answers = dns_data.get("answers", [])
|
|
272
|
+
return answers[0] if answers else value # Return first answer or original value
|
|
273
|
+
elif len(queries) > 1:
|
|
274
|
+
# Multiple queries: return array of first answers
|
|
275
|
+
first_answers = []
|
|
276
|
+
for query in queries:
|
|
277
|
+
if query in resolved_results:
|
|
278
|
+
dns_data = resolved_results[query]
|
|
279
|
+
answers = dns_data.get("answers", [])
|
|
280
|
+
if answers:
|
|
281
|
+
first_answers.append(answers[0])
|
|
282
|
+
return first_answers if first_answers else value
|
|
283
|
+
else:
|
|
284
|
+
# No results: return original value
|
|
285
|
+
return value
|
|
246
286
|
|
|
247
287
|
def _format_dns_ecs( # noqa: C901
|
|
248
288
|
self, query_value: str, records: List[Dict[str, Any]], query_types: List[str]
|
|
@@ -257,60 +297,43 @@ class NSLookupMutator(BaseMutator):
|
|
|
257
297
|
Returns:
|
|
258
298
|
ECS-compliant DNS data structure
|
|
259
299
|
"""
|
|
260
|
-
#
|
|
300
|
+
# Extract answers as simple array of values
|
|
301
|
+
answers = []
|
|
302
|
+
ttls = []
|
|
303
|
+
types = []
|
|
304
|
+
|
|
305
|
+
for record in records:
|
|
306
|
+
data = record.get("data", "")
|
|
307
|
+
if data:
|
|
308
|
+
answers.append(data)
|
|
309
|
+
ttls.append(record.get("ttl", 0))
|
|
310
|
+
types.append(record.get("type", ""))
|
|
311
|
+
|
|
312
|
+
# Build clean ECS structure
|
|
261
313
|
ecs_data = {
|
|
262
314
|
"question": {"name": query_value, "type": query_types[0] if query_types else "A"}, # Primary query type
|
|
263
|
-
"answers":
|
|
315
|
+
"answers": answers, # Simple array of answer values
|
|
264
316
|
"response_code": "NOERROR" if records else "NXDOMAIN",
|
|
265
317
|
}
|
|
266
318
|
|
|
267
|
-
#
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
mx_records = []
|
|
271
|
-
txt_records = []
|
|
319
|
+
# Add TTLs if we have them (optional field)
|
|
320
|
+
if ttls:
|
|
321
|
+
ecs_data["ttl"] = ttls
|
|
272
322
|
|
|
323
|
+
# Add types if they vary (optional field)
|
|
324
|
+
if types and len(set(types)) > 1:
|
|
325
|
+
ecs_data["types"] = types
|
|
326
|
+
|
|
327
|
+
# Extract resolved IPs for ECS standard field
|
|
328
|
+
resolved_ips = []
|
|
273
329
|
for record in records:
|
|
274
330
|
record_type = record.get("type", "")
|
|
275
331
|
data = record.get("data", "")
|
|
276
|
-
|
|
277
332
|
if record_type in ["A", "AAAA"] and data:
|
|
278
333
|
resolved_ips.append(data)
|
|
279
|
-
elif record_type == "PTR" and data:
|
|
280
|
-
hostnames.append(data)
|
|
281
|
-
elif record_type == "CNAME" and data:
|
|
282
|
-
hostnames.append(data)
|
|
283
|
-
elif record_type == "MX" and data:
|
|
284
|
-
mx_records.append(data)
|
|
285
|
-
elif record_type == "TXT" and data:
|
|
286
|
-
txt_records.append(data)
|
|
287
334
|
|
|
288
335
|
# Add resolved_ip array (ECS standard field)
|
|
289
336
|
if resolved_ips:
|
|
290
337
|
ecs_data["resolved_ip"] = resolved_ips
|
|
291
338
|
|
|
292
|
-
# Add convenience fields for easier access
|
|
293
|
-
if hostnames:
|
|
294
|
-
ecs_data["hostname"] = hostnames[0] # Single hostname for simple access
|
|
295
|
-
ecs_data["hostnames"] = hostnames # Array of all hostnames
|
|
296
|
-
|
|
297
|
-
# Add record type specific arrays for convenience
|
|
298
|
-
if resolved_ips:
|
|
299
|
-
# Separate IPv4 and IPv6
|
|
300
|
-
ipv4 = [ip for ip in resolved_ips if ":" not in ip]
|
|
301
|
-
ipv6 = [ip for ip in resolved_ips if ":" in ip]
|
|
302
|
-
if ipv4:
|
|
303
|
-
ecs_data["a"] = ipv4
|
|
304
|
-
if ipv6:
|
|
305
|
-
ecs_data["aaaa"] = ipv6
|
|
306
|
-
|
|
307
|
-
if hostnames and any(r.get("type") == "PTR" for r in records):
|
|
308
|
-
ecs_data["ptr"] = hostnames[0] # Backward compatibility
|
|
309
|
-
|
|
310
|
-
if mx_records:
|
|
311
|
-
ecs_data["mx"] = mx_records
|
|
312
|
-
|
|
313
|
-
if txt_records:
|
|
314
|
-
ecs_data["txt"] = txt_records
|
|
315
|
-
|
|
316
339
|
return ecs_data
|
tql/mutators/security.py
CHANGED
|
@@ -60,53 +60,42 @@ class RefangMutator(BaseMutator):
|
|
|
60
60
|
|
|
61
61
|
def _refang_string(self, s: str) -> str:
|
|
62
62
|
"""Refang a single string."""
|
|
63
|
+
import re
|
|
64
|
+
|
|
63
65
|
result = s
|
|
64
66
|
|
|
65
|
-
# Apply replacements
|
|
66
|
-
#
|
|
67
|
-
|
|
68
|
-
result =
|
|
69
|
-
result =
|
|
70
|
-
result =
|
|
71
|
-
|
|
72
|
-
#
|
|
73
|
-
result =
|
|
74
|
-
result =
|
|
75
|
-
result =
|
|
76
|
-
result =
|
|
77
|
-
result =
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
result =
|
|
81
|
-
result =
|
|
82
|
-
result =
|
|
83
|
-
result =
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
result =
|
|
88
|
-
result =
|
|
89
|
-
result =
|
|
90
|
-
result =
|
|
91
|
-
result =
|
|
92
|
-
|
|
93
|
-
#
|
|
94
|
-
result =
|
|
95
|
-
result =
|
|
96
|
-
result =
|
|
97
|
-
|
|
98
|
-
# At symbol defanging
|
|
99
|
-
result = result.replace("[at]", "@")
|
|
100
|
-
result = result.replace("(at)", "@")
|
|
101
|
-
result = result.replace("{at}", "@")
|
|
102
|
-
result = result.replace("[@]", "@")
|
|
103
|
-
result = result.replace("(@)", "@")
|
|
104
|
-
result = result.replace("{@}", "@")
|
|
105
|
-
|
|
106
|
-
# Slash defanging
|
|
107
|
-
result = result.replace("[/]", "/")
|
|
108
|
-
result = result.replace("(/)", "/")
|
|
109
|
-
result = result.replace("{/}", "/")
|
|
67
|
+
# Apply replacements for common defanging patterns
|
|
68
|
+
# Handle various protocol defanging patterns (case insensitive)
|
|
69
|
+
# Important: Check for 'ps' suffix first to avoid false matches
|
|
70
|
+
result = re.sub(r"h[xX]{1,2}ps://", "https://", result, flags=re.IGNORECASE)
|
|
71
|
+
result = re.sub(r"h[xX]{1,2}p://", "http://", result, flags=re.IGNORECASE)
|
|
72
|
+
result = re.sub(r"f[xX]p://", "ftp://", result, flags=re.IGNORECASE)
|
|
73
|
+
|
|
74
|
+
# Handle bracketed replacements with optional spaces
|
|
75
|
+
result = re.sub(r"\s*\[\.\]\s*", ".", result)
|
|
76
|
+
result = re.sub(r"\s*\[:\]\s*", ":", result)
|
|
77
|
+
result = re.sub(r"\s*\[at\]\s*", "@", result, flags=re.IGNORECASE)
|
|
78
|
+
result = re.sub(r"\s*\[@\]\s*", "@", result)
|
|
79
|
+
result = re.sub(r"\s*\[/\]\s*", "/", result)
|
|
80
|
+
|
|
81
|
+
# Handle parentheses replacements
|
|
82
|
+
result = re.sub(r"\s*\(\.\)\s*", ".", result)
|
|
83
|
+
result = re.sub(r"\s*\(:\)\s*", ":", result)
|
|
84
|
+
result = re.sub(r"\s*\(at\)\s*", "@", result, flags=re.IGNORECASE)
|
|
85
|
+
result = re.sub(r"\s*\(@\)\s*", "@", result)
|
|
86
|
+
result = re.sub(r"\s*\(/\)\s*", "/", result)
|
|
87
|
+
|
|
88
|
+
# Handle braces replacements
|
|
89
|
+
result = re.sub(r"\s*\{\.\}\s*", ".", result)
|
|
90
|
+
result = re.sub(r"\s*\{:\}\s*", ":", result)
|
|
91
|
+
result = re.sub(r"\s*\{at\}\s*", "@", result, flags=re.IGNORECASE)
|
|
92
|
+
result = re.sub(r"\s*\{@\}\s*", "@", result)
|
|
93
|
+
result = re.sub(r"\s*\{/\}\s*", "/", result)
|
|
94
|
+
|
|
95
|
+
# Handle word replacements with optional brackets/parentheses/braces
|
|
96
|
+
result = re.sub(r"\s*\[dot\]\s*", ".", result, flags=re.IGNORECASE)
|
|
97
|
+
result = re.sub(r"\s*\(dot\)\s*", ".", result, flags=re.IGNORECASE)
|
|
98
|
+
result = re.sub(r"\s*\{dot\}\s*", ".", result, flags=re.IGNORECASE)
|
|
110
99
|
|
|
111
100
|
return result
|
|
112
101
|
|
|
@@ -161,65 +150,77 @@ class DefangMutator(BaseMutator):
|
|
|
161
150
|
# Return the defanged value directly
|
|
162
151
|
return defanged_value
|
|
163
152
|
|
|
164
|
-
def _defang_string(self, s: str) -> str:
|
|
153
|
+
def _defang_string(self, s: str) -> str: # noqa: C901
|
|
165
154
|
"""Defang a single string."""
|
|
166
|
-
|
|
155
|
+
import re
|
|
156
|
+
|
|
167
157
|
result = s
|
|
168
158
|
|
|
169
|
-
#
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
rest = rest.replace(".", "[.]")
|
|
159
|
+
# Check if fully defanged to avoid double-defanging
|
|
160
|
+
# Only return early if all components are already defanged
|
|
161
|
+
has_defanged_protocol = "hxxp" in result.lower() or "fxp" in result.lower()
|
|
162
|
+
has_defanged_dots = "[.]" in result
|
|
163
|
+
has_defanged_at = "[at]" in result
|
|
164
|
+
|
|
165
|
+
# If it's a URL with protocol, check if dots are defanged
|
|
166
|
+
if has_defanged_protocol and "://" in result:
|
|
167
|
+
# Extract the part after protocol
|
|
168
|
+
_, after_protocol = result.split("://", 1)
|
|
169
|
+
# If dots in the URL part are already defanged, return as-is
|
|
170
|
+
if "." not in after_protocol or has_defanged_dots:
|
|
171
|
+
return result
|
|
172
|
+
# For non-URLs, if already has defanged components, return
|
|
173
|
+
elif has_defanged_dots and has_defanged_at:
|
|
174
|
+
return result
|
|
175
|
+
|
|
176
|
+
# First, replace protocols (case-insensitive) with lowercase hxxp/hxxps/fxp
|
|
177
|
+
result = re.sub(r"https://", "hxxps://", result, flags=re.IGNORECASE)
|
|
178
|
+
result = re.sub(r"http://", "hxxp://", result, flags=re.IGNORECASE)
|
|
179
|
+
result = re.sub(r"ftp://", "fxp://", result, flags=re.IGNORECASE)
|
|
180
|
+
|
|
181
|
+
# Split the string to process URLs, emails, and domains separately
|
|
182
|
+
# Match URLs first since they're more specific
|
|
183
|
+
url_pattern = r"((?:hxxps?|fxp|https?|ftp)://[^\s]+)"
|
|
184
|
+
parts = re.split(url_pattern, result)
|
|
185
|
+
|
|
186
|
+
defanged_parts = []
|
|
187
|
+
for i, part in enumerate(parts):
|
|
188
|
+
if i % 2 == 1: # This is a URL match
|
|
189
|
+
# For URLs, defang the domain part only
|
|
190
|
+
if "://" in part:
|
|
191
|
+
protocol, rest = part.split("://", 1)
|
|
192
|
+
# Defang dots in the domain/path (avoid double-defanging)
|
|
193
|
+
if "[.]" not in rest:
|
|
194
|
+
rest = rest.replace(".", "[.]")
|
|
206
195
|
# Defang @ if present (for URLs with auth)
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
#
|
|
210
|
-
import re
|
|
211
|
-
|
|
196
|
+
if "[at]" not in rest:
|
|
197
|
+
rest = rest.replace("@", "[at]")
|
|
198
|
+
# Defang colons in port numbers (e.g., :8080)
|
|
212
199
|
rest = re.sub(r":(\d+)", r"[:]\1", rest)
|
|
213
|
-
|
|
214
|
-
else:
|
|
215
|
-
# For non-URL tokens, defang dots and @ symbols
|
|
216
|
-
# But avoid double-defanging
|
|
217
|
-
if "[.]" not in token and "[at]" not in token:
|
|
218
|
-
defanged = token.replace(".", "[.]")
|
|
219
|
-
defanged = defanged.replace("@", "[at]")
|
|
220
|
-
defanged_tokens.append(defanged)
|
|
200
|
+
defanged_parts.append(f"{protocol}://{rest}")
|
|
221
201
|
else:
|
|
222
|
-
|
|
223
|
-
|
|
202
|
+
defanged_parts.append(part)
|
|
203
|
+
else:
|
|
204
|
+
# For non-URL text, handle email addresses and domain patterns
|
|
205
|
+
# First, handle email addresses
|
|
206
|
+
email_pattern = r"([a-zA-Z0-9._%+-]+)@([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})"
|
|
207
|
+
part = re.sub(email_pattern, lambda m: f"{m.group(1)}[at]{m.group(2).replace('.', '[.]')}", part) # type: ignore[arg-type, str-bytes-safe]
|
|
208
|
+
|
|
209
|
+
# Then handle standalone IP addresses
|
|
210
|
+
ip_pattern = r"\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\b"
|
|
211
|
+
part = re.sub(ip_pattern, lambda m: m.group(0).replace(".", "[.]"), part) # type: ignore[arg-type]
|
|
212
|
+
|
|
213
|
+
# Finally handle standalone domain patterns (but not IPs)
|
|
214
|
+
domain_pattern = r"\b([a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)+)\b"
|
|
215
|
+
|
|
216
|
+
def defang_domain(match):
|
|
217
|
+
domain = match.group(0)
|
|
218
|
+
# Only defang if not already defanged and not an IP address
|
|
219
|
+
if "[.]" not in domain and not re.match(r"^\d+\.\d+\.\d+\.\d+$", domain):
|
|
220
|
+
return domain.replace(".", "[.]")
|
|
221
|
+
return domain
|
|
222
|
+
|
|
223
|
+
part = re.sub(domain_pattern, defang_domain, part)
|
|
224
|
+
defanged_parts.append(part)
|
|
224
225
|
|
|
225
|
-
return "
|
|
226
|
+
return "".join(defanged_parts)
|
tql/mutators/string.py
CHANGED
|
@@ -163,3 +163,77 @@ class LengthMutator(BaseMutator):
|
|
|
163
163
|
else:
|
|
164
164
|
# Return the length value directly
|
|
165
165
|
return length_value
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class ReplaceMutator(BaseMutator):
|
|
169
|
+
"""Mutator that replaces all occurrences of a string with another string.
|
|
170
|
+
|
|
171
|
+
Performance Characteristics:
|
|
172
|
+
- In-memory: FAST - Simple string operation with minimal overhead
|
|
173
|
+
- OpenSearch: MODERATE - Requires post-processing of all results
|
|
174
|
+
|
|
175
|
+
Parameters:
|
|
176
|
+
- find: The string to find (required)
|
|
177
|
+
- replace: The string to replace with (required)
|
|
178
|
+
- field: Optional field to append result to
|
|
179
|
+
|
|
180
|
+
Examples:
|
|
181
|
+
# Replace all occurrences
|
|
182
|
+
field | replace(find='old', replace='new')
|
|
183
|
+
|
|
184
|
+
# Use as a filter
|
|
185
|
+
field | replace(find='error', replace='warning') contains 'warning'
|
|
186
|
+
|
|
187
|
+
# Append to another field
|
|
188
|
+
field | replace(find='/', replace='_', field='sanitized_field')
|
|
189
|
+
"""
|
|
190
|
+
|
|
191
|
+
def __init__(self, params: Optional[Dict[str, Any]] = None) -> None:
|
|
192
|
+
super().__init__(params)
|
|
193
|
+
self.performance_in_memory = PerformanceClass.FAST
|
|
194
|
+
self.performance_opensearch = PerformanceClass.MODERATE
|
|
195
|
+
|
|
196
|
+
# Validate required parameters
|
|
197
|
+
if not params:
|
|
198
|
+
raise ValueError("Replace mutator requires 'find' and 'replace' parameters")
|
|
199
|
+
if "find" not in params:
|
|
200
|
+
raise ValueError("Replace mutator requires 'find' parameter")
|
|
201
|
+
if "replace" not in params:
|
|
202
|
+
raise ValueError("Replace mutator requires 'replace' parameter")
|
|
203
|
+
|
|
204
|
+
def apply(self, field_name: str, record: Dict[str, Any], value: Any) -> Any:
|
|
205
|
+
"""Apply the replace transformation."""
|
|
206
|
+
find_str = str(self.params["find"])
|
|
207
|
+
replace_str = str(self.params["replace"])
|
|
208
|
+
append_field = self.params.get("field")
|
|
209
|
+
|
|
210
|
+
# Perform the replace operation
|
|
211
|
+
result: Any # Declare result with Any type to handle different types
|
|
212
|
+
if value is None:
|
|
213
|
+
# Handle None - return as is
|
|
214
|
+
result = value
|
|
215
|
+
elif isinstance(value, str):
|
|
216
|
+
result = value.replace(find_str, replace_str)
|
|
217
|
+
elif isinstance(value, (list, tuple)):
|
|
218
|
+
# Apply replace to each string element in the array
|
|
219
|
+
result = []
|
|
220
|
+
for item in value:
|
|
221
|
+
if isinstance(item, str):
|
|
222
|
+
result.append(item.replace(find_str, replace_str))
|
|
223
|
+
else:
|
|
224
|
+
# Keep non-string items as-is
|
|
225
|
+
result.append(item)
|
|
226
|
+
elif isinstance(value, (int, float, bool)):
|
|
227
|
+
# Convert to string first, then replace, then keep as string
|
|
228
|
+
result = str(value).replace(find_str, replace_str)
|
|
229
|
+
else:
|
|
230
|
+
# For other types, return as-is
|
|
231
|
+
result = value
|
|
232
|
+
|
|
233
|
+
# If append_field is specified, add to record and return original value
|
|
234
|
+
if append_field:
|
|
235
|
+
append_to_result(record, append_field, result)
|
|
236
|
+
return value
|
|
237
|
+
else:
|
|
238
|
+
# Return the replaced result directly
|
|
239
|
+
return result
|
|
@@ -67,7 +67,9 @@ class FieldMapping:
|
|
|
67
67
|
self.field_types[self.base_field_name] = "keyword"
|
|
68
68
|
elif base_type == "text":
|
|
69
69
|
analyzer = mapping_info.get("analyzer", "standard")
|
|
70
|
-
|
|
70
|
+
# If analyzer is a dict (custom analyzer), use "custom" as key
|
|
71
|
+
analyzer_key = "custom" if isinstance(analyzer, dict) else analyzer
|
|
72
|
+
self.text_fields[analyzer_key] = self.base_field_name
|
|
71
73
|
self.field_types[self.base_field_name] = "text"
|
|
72
74
|
else:
|
|
73
75
|
self.field_types[self.base_field_name] = base_type
|
|
@@ -85,7 +87,9 @@ class FieldMapping:
|
|
|
85
87
|
self.field_types[field_path] = "keyword"
|
|
86
88
|
elif subfield_type == "text":
|
|
87
89
|
analyzer = subfield_config.get("analyzer", "standard")
|
|
88
|
-
|
|
90
|
+
# If analyzer is a dict (custom analyzer), use "custom" as key
|
|
91
|
+
analyzer_key = "custom" if isinstance(analyzer, dict) else analyzer
|
|
92
|
+
self.text_fields[analyzer_key] = field_path
|
|
89
93
|
self.field_types[field_path] = "text"
|
|
90
94
|
elif subfield_type:
|
|
91
95
|
self.field_types[field_path] = subfield_type
|
|
@@ -114,7 +118,9 @@ class FieldMapping:
|
|
|
114
118
|
if field_type == "keyword":
|
|
115
119
|
self.keyword_field = field_name
|
|
116
120
|
elif field_type == "text":
|
|
117
|
-
|
|
121
|
+
# If analyzer is a dict (custom analyzer), use "custom" as key
|
|
122
|
+
analyzer_key = "custom" if isinstance(analyzer, dict) else analyzer
|
|
123
|
+
self.text_fields[analyzer_key] = field_name
|
|
118
124
|
else:
|
|
119
125
|
# Legacy format: "keyword" or "text" or other types
|
|
120
126
|
field_type = field_config
|
|
@@ -42,6 +42,18 @@ class LuceneConverter:
|
|
|
42
42
|
return self._convert_unary_op_to_lucene(node)
|
|
43
43
|
elif node_type == "collection_op":
|
|
44
44
|
return self._convert_collection_op_to_lucene(node)
|
|
45
|
+
elif node_type == "query_with_stats":
|
|
46
|
+
# For query_with_stats, only convert the filter part to Lucene
|
|
47
|
+
# The stats part is handled by the stats engine
|
|
48
|
+
filter_node = node.get("filter")
|
|
49
|
+
if filter_node:
|
|
50
|
+
return self._convert_node_to_lucene(filter_node)
|
|
51
|
+
else:
|
|
52
|
+
return "*:*"
|
|
53
|
+
elif node_type == "stats_expr":
|
|
54
|
+
# Pure stats queries match all documents in Lucene
|
|
55
|
+
# The aggregations are handled by the stats engine
|
|
56
|
+
return "*:*"
|
|
45
57
|
|
|
46
58
|
raise TQLValidationError(f"Unknown node type: {node}")
|
|
47
59
|
|