tellaro-query-language 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tellaro_query_language-0.1.0.dist-info/LICENSE +21 -0
- tellaro_query_language-0.1.0.dist-info/METADATA +401 -0
- tellaro_query_language-0.1.0.dist-info/RECORD +56 -0
- tellaro_query_language-0.1.0.dist-info/WHEEL +4 -0
- tellaro_query_language-0.1.0.dist-info/entry_points.txt +7 -0
- tql/__init__.py +47 -0
- tql/analyzer.py +385 -0
- tql/cache/__init__.py +7 -0
- tql/cache/base.py +25 -0
- tql/cache/memory.py +63 -0
- tql/cache/redis.py +68 -0
- tql/core.py +929 -0
- tql/core_components/README.md +92 -0
- tql/core_components/__init__.py +20 -0
- tql/core_components/file_operations.py +113 -0
- tql/core_components/opensearch_operations.py +869 -0
- tql/core_components/stats_operations.py +200 -0
- tql/core_components/validation_operations.py +599 -0
- tql/evaluator.py +379 -0
- tql/evaluator_components/README.md +131 -0
- tql/evaluator_components/__init__.py +17 -0
- tql/evaluator_components/field_access.py +176 -0
- tql/evaluator_components/special_expressions.py +296 -0
- tql/evaluator_components/value_comparison.py +315 -0
- tql/exceptions.py +160 -0
- tql/geoip_normalizer.py +233 -0
- tql/mutator_analyzer.py +830 -0
- tql/mutators/__init__.py +222 -0
- tql/mutators/base.py +78 -0
- tql/mutators/dns.py +316 -0
- tql/mutators/encoding.py +218 -0
- tql/mutators/geo.py +363 -0
- tql/mutators/list.py +212 -0
- tql/mutators/network.py +163 -0
- tql/mutators/security.py +225 -0
- tql/mutators/string.py +165 -0
- tql/opensearch.py +78 -0
- tql/opensearch_components/README.md +130 -0
- tql/opensearch_components/__init__.py +17 -0
- tql/opensearch_components/field_mapping.py +399 -0
- tql/opensearch_components/lucene_converter.py +305 -0
- tql/opensearch_components/query_converter.py +775 -0
- tql/opensearch_mappings.py +309 -0
- tql/opensearch_stats.py +451 -0
- tql/parser.py +1363 -0
- tql/parser_components/README.md +72 -0
- tql/parser_components/__init__.py +20 -0
- tql/parser_components/ast_builder.py +162 -0
- tql/parser_components/error_analyzer.py +101 -0
- tql/parser_components/field_extractor.py +112 -0
- tql/parser_components/grammar.py +473 -0
- tql/post_processor.py +737 -0
- tql/scripts.py +124 -0
- tql/stats_evaluator.py +444 -0
- tql/stats_transformer.py +184 -0
- tql/validators.py +110 -0
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
"""Lucene query string converter for OpenSearch backend.
|
|
2
|
+
|
|
3
|
+
This module handles conversion of TQL AST to Lucene query strings.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Any, Dict, Optional, Tuple
|
|
7
|
+
|
|
8
|
+
from ..exceptions import TQLUnsupportedOperationError, TQLValidationError
|
|
9
|
+
from .field_mapping import FieldMapping
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class LuceneConverter:
|
|
13
|
+
"""Handles conversion of TQL AST to Lucene query strings."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, field_mappings: Dict[str, FieldMapping], simple_mappings: Dict[str, str]):
|
|
16
|
+
"""Initialize Lucene converter.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
field_mappings: Intelligent field mappings
|
|
20
|
+
simple_mappings: Simple field name mappings
|
|
21
|
+
"""
|
|
22
|
+
self.intelligent_mappings = field_mappings
|
|
23
|
+
self.simple_mappings = simple_mappings
|
|
24
|
+
|
|
25
|
+
def convert_lucene(self, ast: Dict[str, Any]) -> str:
|
|
26
|
+
"""Convert a TQL AST to Lucene query string."""
|
|
27
|
+
return self._convert_node_to_lucene(ast)
|
|
28
|
+
|
|
29
|
+
def _convert_node_to_lucene(self, node: Any) -> str:
|
|
30
|
+
"""Convert a single AST node to Lucene query string."""
|
|
31
|
+
if isinstance(node, dict):
|
|
32
|
+
node_type = node.get("type")
|
|
33
|
+
|
|
34
|
+
if node_type == "comparison":
|
|
35
|
+
return self._convert_comparison_to_lucene(node)
|
|
36
|
+
elif node_type == "logical_op":
|
|
37
|
+
return self._convert_logical_op_to_lucene(node)
|
|
38
|
+
elif node_type == "unary_op":
|
|
39
|
+
return self._convert_unary_op_to_lucene(node)
|
|
40
|
+
elif node_type == "collection_op":
|
|
41
|
+
return self._convert_collection_op_to_lucene(node)
|
|
42
|
+
|
|
43
|
+
raise TQLValidationError(f"Unknown node type: {node}")
|
|
44
|
+
|
|
45
|
+
def _convert_comparison_to_lucene(self, node: Dict[str, Any]) -> str: # noqa: C901
|
|
46
|
+
"""Convert a comparison operation to Lucene query string."""
|
|
47
|
+
field_name = node["field"]
|
|
48
|
+
operator = node["operator"]
|
|
49
|
+
value = node["value"]
|
|
50
|
+
|
|
51
|
+
# Get the actual field name to use (could be enhanced to extract analyzer from query context)
|
|
52
|
+
lucene_field, use_wildcard = self._resolve_field_name(field_name, operator)
|
|
53
|
+
|
|
54
|
+
# Escape special characters in values
|
|
55
|
+
if isinstance(value, str):
|
|
56
|
+
escaped_value = self._escape_lucene_value(value)
|
|
57
|
+
else:
|
|
58
|
+
escaped_value = str(value)
|
|
59
|
+
|
|
60
|
+
# Handle special wildcard conversion for keyword fields
|
|
61
|
+
if use_wildcard and operator == "contains":
|
|
62
|
+
return f"{lucene_field}:*{escaped_value}*"
|
|
63
|
+
|
|
64
|
+
# Convert operator to Lucene syntax
|
|
65
|
+
if operator in ["eq", "="]:
|
|
66
|
+
return f"{lucene_field}:{escaped_value}"
|
|
67
|
+
elif operator in ["ne", "!="]:
|
|
68
|
+
return f"NOT {lucene_field}:{escaped_value}"
|
|
69
|
+
elif operator in ["gt", ">"]:
|
|
70
|
+
return f"{lucene_field}:>{escaped_value}"
|
|
71
|
+
elif operator in ["gte", ">="]:
|
|
72
|
+
return f"{lucene_field}:>={escaped_value}"
|
|
73
|
+
elif operator in ["lt", "<"]:
|
|
74
|
+
return f"{lucene_field}:<{escaped_value}"
|
|
75
|
+
elif operator in ["lte", "<="]:
|
|
76
|
+
return f"{lucene_field}:<={escaped_value}"
|
|
77
|
+
elif operator == "contains":
|
|
78
|
+
if use_wildcard:
|
|
79
|
+
return f"{lucene_field}:*{escaped_value}*"
|
|
80
|
+
else:
|
|
81
|
+
# For text fields, use quoted phrase
|
|
82
|
+
return f'{lucene_field}:"{escaped_value}"'
|
|
83
|
+
elif operator == "startswith":
|
|
84
|
+
return f"{lucene_field}:{escaped_value}*"
|
|
85
|
+
elif operator == "endswith":
|
|
86
|
+
return f"{lucene_field}:*{escaped_value}"
|
|
87
|
+
elif operator == "in":
|
|
88
|
+
if isinstance(value, list):
|
|
89
|
+
escaped_values = [self._escape_lucene_value(str(v)) for v in value]
|
|
90
|
+
return f"{lucene_field}:({' OR '.join(escaped_values)})"
|
|
91
|
+
else:
|
|
92
|
+
return f"{lucene_field}:{escaped_value}"
|
|
93
|
+
elif operator == "regexp":
|
|
94
|
+
return f"{lucene_field}:/{escaped_value}/"
|
|
95
|
+
elif operator == "exists":
|
|
96
|
+
return f"_exists_:{lucene_field}"
|
|
97
|
+
elif operator == "is":
|
|
98
|
+
if value is None:
|
|
99
|
+
return f"NOT _exists_:{lucene_field}"
|
|
100
|
+
else:
|
|
101
|
+
return f"{lucene_field}:{escaped_value}"
|
|
102
|
+
elif operator == "between":
|
|
103
|
+
if isinstance(value, list) and len(value) == 2:
|
|
104
|
+
# Convert values to appropriate types
|
|
105
|
+
val1 = self._convert_value(value[0])
|
|
106
|
+
val2 = self._convert_value(value[1])
|
|
107
|
+
|
|
108
|
+
# Allow values in any order
|
|
109
|
+
lower = (
|
|
110
|
+
min(val1, val2) if isinstance(val1, (int, float)) and isinstance(val2, (int, float)) else value[0]
|
|
111
|
+
)
|
|
112
|
+
upper = (
|
|
113
|
+
max(val1, val2) if isinstance(val1, (int, float)) and isinstance(val2, (int, float)) else value[1]
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
# For non-numeric values (like dates), we use the original order if we can't determine min/max
|
|
117
|
+
if not isinstance(val1, (int, float)) or not isinstance(val2, (int, float)):
|
|
118
|
+
try:
|
|
119
|
+
# If values can be compared (like strings), try to determine order
|
|
120
|
+
if val1 > val2:
|
|
121
|
+
lower, upper = val2, val1
|
|
122
|
+
else:
|
|
123
|
+
lower, upper = val1, val2
|
|
124
|
+
except TypeError:
|
|
125
|
+
# If comparison fails, use the original order
|
|
126
|
+
lower, upper = value[0], value[1]
|
|
127
|
+
|
|
128
|
+
return f"{lucene_field}:[{lower} TO {upper}]"
|
|
129
|
+
else:
|
|
130
|
+
raise TQLValidationError(f"Between operator requires a list with two values, got: {value}")
|
|
131
|
+
elif operator == "cidr":
|
|
132
|
+
return f"{lucene_field}:{escaped_value}"
|
|
133
|
+
else:
|
|
134
|
+
raise TQLUnsupportedOperationError(f"Operator '{operator}' not supported for Lucene")
|
|
135
|
+
|
|
136
|
+
def _convert_logical_op_to_lucene(self, node: Dict[str, Any]) -> str:
|
|
137
|
+
"""Convert a logical operation to Lucene query string."""
|
|
138
|
+
operator = node["operator"]
|
|
139
|
+
left_query = self._convert_node_to_lucene(node["left"])
|
|
140
|
+
right_query = self._convert_node_to_lucene(node["right"])
|
|
141
|
+
|
|
142
|
+
if operator == "and":
|
|
143
|
+
return f"({left_query}) AND ({right_query})"
|
|
144
|
+
elif operator == "or":
|
|
145
|
+
return f"({left_query}) OR ({right_query})"
|
|
146
|
+
else:
|
|
147
|
+
raise TQLUnsupportedOperationError(f"Logical operator '{operator}' not supported for Lucene")
|
|
148
|
+
|
|
149
|
+
def _convert_unary_op_to_lucene(self, node: Dict[str, Any]) -> str:
|
|
150
|
+
"""Convert a unary operation to Lucene query string."""
|
|
151
|
+
operator = node["operator"]
|
|
152
|
+
operand_query = self._convert_node_to_lucene(node["operand"])
|
|
153
|
+
|
|
154
|
+
if operator == "not":
|
|
155
|
+
return f"NOT ({operand_query})"
|
|
156
|
+
else:
|
|
157
|
+
raise TQLUnsupportedOperationError(f"Unary operator '{operator}' not supported for Lucene")
|
|
158
|
+
|
|
159
|
+
def _convert_collection_op_to_lucene(self, node: Dict[str, Any]) -> str: # noqa: C901
|
|
160
|
+
"""Convert a collection operation to Lucene query string."""
|
|
161
|
+
operator = node["operator"]
|
|
162
|
+
field_name = node["field"]
|
|
163
|
+
comparison_operator = node["comparison_operator"]
|
|
164
|
+
value = node["value"]
|
|
165
|
+
|
|
166
|
+
# Get the actual field name to use
|
|
167
|
+
lucene_field, use_wildcard = self._resolve_field_name(field_name, comparison_operator)
|
|
168
|
+
|
|
169
|
+
# Convert value
|
|
170
|
+
if isinstance(value, str):
|
|
171
|
+
escaped_value = self._escape_lucene_value(value)
|
|
172
|
+
else:
|
|
173
|
+
escaped_value = str(value)
|
|
174
|
+
|
|
175
|
+
# Build the appropriate comparison based on the operator
|
|
176
|
+
if comparison_operator in ["eq", "="]:
|
|
177
|
+
comparison = f"{lucene_field}:{escaped_value}"
|
|
178
|
+
elif comparison_operator in ["ne", "!="]:
|
|
179
|
+
comparison = f"NOT {lucene_field}:{escaped_value}"
|
|
180
|
+
elif comparison_operator in ["gt", ">"]:
|
|
181
|
+
comparison = f"{lucene_field}:>{escaped_value}"
|
|
182
|
+
elif comparison_operator in ["gte", ">="]:
|
|
183
|
+
comparison = f"{lucene_field}:>={escaped_value}"
|
|
184
|
+
elif comparison_operator in ["lt", "<"]:
|
|
185
|
+
comparison = f"{lucene_field}:<{escaped_value}"
|
|
186
|
+
elif comparison_operator in ["lte", "<="]:
|
|
187
|
+
comparison = f"{lucene_field}:<={escaped_value}"
|
|
188
|
+
elif comparison_operator == "contains":
|
|
189
|
+
if use_wildcard:
|
|
190
|
+
comparison = f"{lucene_field}:*{escaped_value}*"
|
|
191
|
+
else:
|
|
192
|
+
comparison = f'{lucene_field}:"{escaped_value}"'
|
|
193
|
+
elif comparison_operator == "startswith":
|
|
194
|
+
comparison = f"{lucene_field}:{escaped_value}*"
|
|
195
|
+
elif comparison_operator == "endswith":
|
|
196
|
+
comparison = f"{lucene_field}:*{escaped_value}"
|
|
197
|
+
elif comparison_operator == "regexp":
|
|
198
|
+
comparison = f"{lucene_field}:/{escaped_value}/"
|
|
199
|
+
elif comparison_operator == "in":
|
|
200
|
+
if isinstance(value, list):
|
|
201
|
+
escaped_values = [self._escape_lucene_value(str(v)) for v in value]
|
|
202
|
+
comparison = f"{lucene_field}:({' OR '.join(escaped_values)})"
|
|
203
|
+
else:
|
|
204
|
+
comparison = f"{lucene_field}:{escaped_value}"
|
|
205
|
+
else:
|
|
206
|
+
raise TQLUnsupportedOperationError(
|
|
207
|
+
f"Operator '{comparison_operator}' not supported for collection operators in Lucene"
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
# For ANY, this is straightforward - we're checking if any element matches
|
|
211
|
+
if operator == "any":
|
|
212
|
+
return comparison
|
|
213
|
+
# For ALL, we need to negate the negated comparison
|
|
214
|
+
elif operator == "all":
|
|
215
|
+
# Not(Not(comparison)) is semantically equivalent to requiring ALL elements match
|
|
216
|
+
return f"NOT (_exists_:{lucene_field} AND NOT ({comparison}))"
|
|
217
|
+
else:
|
|
218
|
+
raise TQLUnsupportedOperationError(f"Collection operator '{operator}' not supported for Lucene")
|
|
219
|
+
|
|
220
|
+
def _escape_lucene_value(self, value: str) -> str:
|
|
221
|
+
"""Escape special characters in Lucene query values."""
|
|
222
|
+
# Lucene special characters: + - = && || > < ! ( ) { } [ ] ^ " ~ * ? : \ /
|
|
223
|
+
special_chars = [
|
|
224
|
+
"+",
|
|
225
|
+
"-",
|
|
226
|
+
"=",
|
|
227
|
+
"&",
|
|
228
|
+
"|",
|
|
229
|
+
">",
|
|
230
|
+
"<",
|
|
231
|
+
"!",
|
|
232
|
+
"(",
|
|
233
|
+
")",
|
|
234
|
+
"{",
|
|
235
|
+
"}",
|
|
236
|
+
"[",
|
|
237
|
+
"]",
|
|
238
|
+
"^",
|
|
239
|
+
'"',
|
|
240
|
+
"~",
|
|
241
|
+
"*",
|
|
242
|
+
"?",
|
|
243
|
+
":",
|
|
244
|
+
"\\",
|
|
245
|
+
"/",
|
|
246
|
+
]
|
|
247
|
+
|
|
248
|
+
escaped = value
|
|
249
|
+
for char in special_chars:
|
|
250
|
+
escaped = escaped.replace(char, f"\\{char}")
|
|
251
|
+
|
|
252
|
+
# Quote the value if it contains spaces
|
|
253
|
+
if " " in escaped:
|
|
254
|
+
escaped = f'"{escaped}"'
|
|
255
|
+
|
|
256
|
+
return escaped
|
|
257
|
+
|
|
258
|
+
def _resolve_field_name(
|
|
259
|
+
self, field_name: str, operator: str, preferred_analyzer: Optional[str] = None
|
|
260
|
+
) -> Tuple[str, bool]:
|
|
261
|
+
"""Resolve field name based on mappings and operator.
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
field_name: The TQL field name
|
|
265
|
+
operator: The operator being used
|
|
266
|
+
preferred_analyzer: Preferred analyzer for text operations
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
Tuple of (resolved_field_name, use_wildcard_conversion)
|
|
270
|
+
"""
|
|
271
|
+
# Check intelligent mappings first
|
|
272
|
+
if field_name in self.intelligent_mappings:
|
|
273
|
+
field_mapping = self.intelligent_mappings[field_name]
|
|
274
|
+
resolved_field = field_mapping.get_field_for_operator(operator, preferred_analyzer)
|
|
275
|
+
use_wildcard = field_mapping.needs_wildcard_conversion(operator, preferred_analyzer)
|
|
276
|
+
# If resolved field is empty, use the original field name
|
|
277
|
+
if not resolved_field:
|
|
278
|
+
resolved_field = field_name
|
|
279
|
+
return resolved_field, use_wildcard
|
|
280
|
+
|
|
281
|
+
# Check simple mappings
|
|
282
|
+
elif field_name in self.simple_mappings:
|
|
283
|
+
return self.simple_mappings[field_name], False
|
|
284
|
+
|
|
285
|
+
# No mapping, use field name as-is
|
|
286
|
+
else:
|
|
287
|
+
return field_name, False
|
|
288
|
+
|
|
289
|
+
def _convert_value(self, value: Any) -> Any:
|
|
290
|
+
"""Convert value types for Lucene compatibility.
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
value: Value to convert
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
Converted value (bool, None, or original)
|
|
297
|
+
"""
|
|
298
|
+
if isinstance(value, str):
|
|
299
|
+
if value.lower() == "true":
|
|
300
|
+
return True
|
|
301
|
+
elif value.lower() == "false":
|
|
302
|
+
return False
|
|
303
|
+
elif value.lower() == "null":
|
|
304
|
+
return None
|
|
305
|
+
return value
|