tellaro-query-language 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. tellaro_query_language-0.1.0.dist-info/LICENSE +21 -0
  2. tellaro_query_language-0.1.0.dist-info/METADATA +401 -0
  3. tellaro_query_language-0.1.0.dist-info/RECORD +56 -0
  4. tellaro_query_language-0.1.0.dist-info/WHEEL +4 -0
  5. tellaro_query_language-0.1.0.dist-info/entry_points.txt +7 -0
  6. tql/__init__.py +47 -0
  7. tql/analyzer.py +385 -0
  8. tql/cache/__init__.py +7 -0
  9. tql/cache/base.py +25 -0
  10. tql/cache/memory.py +63 -0
  11. tql/cache/redis.py +68 -0
  12. tql/core.py +929 -0
  13. tql/core_components/README.md +92 -0
  14. tql/core_components/__init__.py +20 -0
  15. tql/core_components/file_operations.py +113 -0
  16. tql/core_components/opensearch_operations.py +869 -0
  17. tql/core_components/stats_operations.py +200 -0
  18. tql/core_components/validation_operations.py +599 -0
  19. tql/evaluator.py +379 -0
  20. tql/evaluator_components/README.md +131 -0
  21. tql/evaluator_components/__init__.py +17 -0
  22. tql/evaluator_components/field_access.py +176 -0
  23. tql/evaluator_components/special_expressions.py +296 -0
  24. tql/evaluator_components/value_comparison.py +315 -0
  25. tql/exceptions.py +160 -0
  26. tql/geoip_normalizer.py +233 -0
  27. tql/mutator_analyzer.py +830 -0
  28. tql/mutators/__init__.py +222 -0
  29. tql/mutators/base.py +78 -0
  30. tql/mutators/dns.py +316 -0
  31. tql/mutators/encoding.py +218 -0
  32. tql/mutators/geo.py +363 -0
  33. tql/mutators/list.py +212 -0
  34. tql/mutators/network.py +163 -0
  35. tql/mutators/security.py +225 -0
  36. tql/mutators/string.py +165 -0
  37. tql/opensearch.py +78 -0
  38. tql/opensearch_components/README.md +130 -0
  39. tql/opensearch_components/__init__.py +17 -0
  40. tql/opensearch_components/field_mapping.py +399 -0
  41. tql/opensearch_components/lucene_converter.py +305 -0
  42. tql/opensearch_components/query_converter.py +775 -0
  43. tql/opensearch_mappings.py +309 -0
  44. tql/opensearch_stats.py +451 -0
  45. tql/parser.py +1363 -0
  46. tql/parser_components/README.md +72 -0
  47. tql/parser_components/__init__.py +20 -0
  48. tql/parser_components/ast_builder.py +162 -0
  49. tql/parser_components/error_analyzer.py +101 -0
  50. tql/parser_components/field_extractor.py +112 -0
  51. tql/parser_components/grammar.py +473 -0
  52. tql/post_processor.py +737 -0
  53. tql/scripts.py +124 -0
  54. tql/stats_evaluator.py +444 -0
  55. tql/stats_transformer.py +184 -0
  56. tql/validators.py +110 -0
@@ -0,0 +1,305 @@
1
+ """Lucene query string converter for OpenSearch backend.
2
+
3
+ This module handles conversion of TQL AST to Lucene query strings.
4
+ """
5
+
6
+ from typing import Any, Dict, Optional, Tuple
7
+
8
+ from ..exceptions import TQLUnsupportedOperationError, TQLValidationError
9
+ from .field_mapping import FieldMapping
10
+
11
+
12
+ class LuceneConverter:
13
+ """Handles conversion of TQL AST to Lucene query strings."""
14
+
15
+ def __init__(self, field_mappings: Dict[str, FieldMapping], simple_mappings: Dict[str, str]):
16
+ """Initialize Lucene converter.
17
+
18
+ Args:
19
+ field_mappings: Intelligent field mappings
20
+ simple_mappings: Simple field name mappings
21
+ """
22
+ self.intelligent_mappings = field_mappings
23
+ self.simple_mappings = simple_mappings
24
+
25
+ def convert_lucene(self, ast: Dict[str, Any]) -> str:
26
+ """Convert a TQL AST to Lucene query string."""
27
+ return self._convert_node_to_lucene(ast)
28
+
29
+ def _convert_node_to_lucene(self, node: Any) -> str:
30
+ """Convert a single AST node to Lucene query string."""
31
+ if isinstance(node, dict):
32
+ node_type = node.get("type")
33
+
34
+ if node_type == "comparison":
35
+ return self._convert_comparison_to_lucene(node)
36
+ elif node_type == "logical_op":
37
+ return self._convert_logical_op_to_lucene(node)
38
+ elif node_type == "unary_op":
39
+ return self._convert_unary_op_to_lucene(node)
40
+ elif node_type == "collection_op":
41
+ return self._convert_collection_op_to_lucene(node)
42
+
43
+ raise TQLValidationError(f"Unknown node type: {node}")
44
+
45
+ def _convert_comparison_to_lucene(self, node: Dict[str, Any]) -> str: # noqa: C901
46
+ """Convert a comparison operation to Lucene query string."""
47
+ field_name = node["field"]
48
+ operator = node["operator"]
49
+ value = node["value"]
50
+
51
+ # Get the actual field name to use (could be enhanced to extract analyzer from query context)
52
+ lucene_field, use_wildcard = self._resolve_field_name(field_name, operator)
53
+
54
+ # Escape special characters in values
55
+ if isinstance(value, str):
56
+ escaped_value = self._escape_lucene_value(value)
57
+ else:
58
+ escaped_value = str(value)
59
+
60
+ # Handle special wildcard conversion for keyword fields
61
+ if use_wildcard and operator == "contains":
62
+ return f"{lucene_field}:*{escaped_value}*"
63
+
64
+ # Convert operator to Lucene syntax
65
+ if operator in ["eq", "="]:
66
+ return f"{lucene_field}:{escaped_value}"
67
+ elif operator in ["ne", "!="]:
68
+ return f"NOT {lucene_field}:{escaped_value}"
69
+ elif operator in ["gt", ">"]:
70
+ return f"{lucene_field}:>{escaped_value}"
71
+ elif operator in ["gte", ">="]:
72
+ return f"{lucene_field}:>={escaped_value}"
73
+ elif operator in ["lt", "<"]:
74
+ return f"{lucene_field}:<{escaped_value}"
75
+ elif operator in ["lte", "<="]:
76
+ return f"{lucene_field}:<={escaped_value}"
77
+ elif operator == "contains":
78
+ if use_wildcard:
79
+ return f"{lucene_field}:*{escaped_value}*"
80
+ else:
81
+ # For text fields, use quoted phrase
82
+ return f'{lucene_field}:"{escaped_value}"'
83
+ elif operator == "startswith":
84
+ return f"{lucene_field}:{escaped_value}*"
85
+ elif operator == "endswith":
86
+ return f"{lucene_field}:*{escaped_value}"
87
+ elif operator == "in":
88
+ if isinstance(value, list):
89
+ escaped_values = [self._escape_lucene_value(str(v)) for v in value]
90
+ return f"{lucene_field}:({' OR '.join(escaped_values)})"
91
+ else:
92
+ return f"{lucene_field}:{escaped_value}"
93
+ elif operator == "regexp":
94
+ return f"{lucene_field}:/{escaped_value}/"
95
+ elif operator == "exists":
96
+ return f"_exists_:{lucene_field}"
97
+ elif operator == "is":
98
+ if value is None:
99
+ return f"NOT _exists_:{lucene_field}"
100
+ else:
101
+ return f"{lucene_field}:{escaped_value}"
102
+ elif operator == "between":
103
+ if isinstance(value, list) and len(value) == 2:
104
+ # Convert values to appropriate types
105
+ val1 = self._convert_value(value[0])
106
+ val2 = self._convert_value(value[1])
107
+
108
+ # Allow values in any order
109
+ lower = (
110
+ min(val1, val2) if isinstance(val1, (int, float)) and isinstance(val2, (int, float)) else value[0]
111
+ )
112
+ upper = (
113
+ max(val1, val2) if isinstance(val1, (int, float)) and isinstance(val2, (int, float)) else value[1]
114
+ )
115
+
116
+ # For non-numeric values (like dates), we use the original order if we can't determine min/max
117
+ if not isinstance(val1, (int, float)) or not isinstance(val2, (int, float)):
118
+ try:
119
+ # If values can be compared (like strings), try to determine order
120
+ if val1 > val2:
121
+ lower, upper = val2, val1
122
+ else:
123
+ lower, upper = val1, val2
124
+ except TypeError:
125
+ # If comparison fails, use the original order
126
+ lower, upper = value[0], value[1]
127
+
128
+ return f"{lucene_field}:[{lower} TO {upper}]"
129
+ else:
130
+ raise TQLValidationError(f"Between operator requires a list with two values, got: {value}")
131
+ elif operator == "cidr":
132
+ return f"{lucene_field}:{escaped_value}"
133
+ else:
134
+ raise TQLUnsupportedOperationError(f"Operator '{operator}' not supported for Lucene")
135
+
136
+ def _convert_logical_op_to_lucene(self, node: Dict[str, Any]) -> str:
137
+ """Convert a logical operation to Lucene query string."""
138
+ operator = node["operator"]
139
+ left_query = self._convert_node_to_lucene(node["left"])
140
+ right_query = self._convert_node_to_lucene(node["right"])
141
+
142
+ if operator == "and":
143
+ return f"({left_query}) AND ({right_query})"
144
+ elif operator == "or":
145
+ return f"({left_query}) OR ({right_query})"
146
+ else:
147
+ raise TQLUnsupportedOperationError(f"Logical operator '{operator}' not supported for Lucene")
148
+
149
+ def _convert_unary_op_to_lucene(self, node: Dict[str, Any]) -> str:
150
+ """Convert a unary operation to Lucene query string."""
151
+ operator = node["operator"]
152
+ operand_query = self._convert_node_to_lucene(node["operand"])
153
+
154
+ if operator == "not":
155
+ return f"NOT ({operand_query})"
156
+ else:
157
+ raise TQLUnsupportedOperationError(f"Unary operator '{operator}' not supported for Lucene")
158
+
159
+ def _convert_collection_op_to_lucene(self, node: Dict[str, Any]) -> str: # noqa: C901
160
+ """Convert a collection operation to Lucene query string."""
161
+ operator = node["operator"]
162
+ field_name = node["field"]
163
+ comparison_operator = node["comparison_operator"]
164
+ value = node["value"]
165
+
166
+ # Get the actual field name to use
167
+ lucene_field, use_wildcard = self._resolve_field_name(field_name, comparison_operator)
168
+
169
+ # Convert value
170
+ if isinstance(value, str):
171
+ escaped_value = self._escape_lucene_value(value)
172
+ else:
173
+ escaped_value = str(value)
174
+
175
+ # Build the appropriate comparison based on the operator
176
+ if comparison_operator in ["eq", "="]:
177
+ comparison = f"{lucene_field}:{escaped_value}"
178
+ elif comparison_operator in ["ne", "!="]:
179
+ comparison = f"NOT {lucene_field}:{escaped_value}"
180
+ elif comparison_operator in ["gt", ">"]:
181
+ comparison = f"{lucene_field}:>{escaped_value}"
182
+ elif comparison_operator in ["gte", ">="]:
183
+ comparison = f"{lucene_field}:>={escaped_value}"
184
+ elif comparison_operator in ["lt", "<"]:
185
+ comparison = f"{lucene_field}:<{escaped_value}"
186
+ elif comparison_operator in ["lte", "<="]:
187
+ comparison = f"{lucene_field}:<={escaped_value}"
188
+ elif comparison_operator == "contains":
189
+ if use_wildcard:
190
+ comparison = f"{lucene_field}:*{escaped_value}*"
191
+ else:
192
+ comparison = f'{lucene_field}:"{escaped_value}"'
193
+ elif comparison_operator == "startswith":
194
+ comparison = f"{lucene_field}:{escaped_value}*"
195
+ elif comparison_operator == "endswith":
196
+ comparison = f"{lucene_field}:*{escaped_value}"
197
+ elif comparison_operator == "regexp":
198
+ comparison = f"{lucene_field}:/{escaped_value}/"
199
+ elif comparison_operator == "in":
200
+ if isinstance(value, list):
201
+ escaped_values = [self._escape_lucene_value(str(v)) for v in value]
202
+ comparison = f"{lucene_field}:({' OR '.join(escaped_values)})"
203
+ else:
204
+ comparison = f"{lucene_field}:{escaped_value}"
205
+ else:
206
+ raise TQLUnsupportedOperationError(
207
+ f"Operator '{comparison_operator}' not supported for collection operators in Lucene"
208
+ )
209
+
210
+ # For ANY, this is straightforward - we're checking if any element matches
211
+ if operator == "any":
212
+ return comparison
213
+ # For ALL, we need to negate the negated comparison
214
+ elif operator == "all":
215
+ # Not(Not(comparison)) is semantically equivalent to requiring ALL elements match
216
+ return f"NOT (_exists_:{lucene_field} AND NOT ({comparison}))"
217
+ else:
218
+ raise TQLUnsupportedOperationError(f"Collection operator '{operator}' not supported for Lucene")
219
+
220
+ def _escape_lucene_value(self, value: str) -> str:
221
+ """Escape special characters in Lucene query values."""
222
+ # Lucene special characters: + - = && || > < ! ( ) { } [ ] ^ " ~ * ? : \ /
223
+ special_chars = [
224
+ "+",
225
+ "-",
226
+ "=",
227
+ "&",
228
+ "|",
229
+ ">",
230
+ "<",
231
+ "!",
232
+ "(",
233
+ ")",
234
+ "{",
235
+ "}",
236
+ "[",
237
+ "]",
238
+ "^",
239
+ '"',
240
+ "~",
241
+ "*",
242
+ "?",
243
+ ":",
244
+ "\\",
245
+ "/",
246
+ ]
247
+
248
+ escaped = value
249
+ for char in special_chars:
250
+ escaped = escaped.replace(char, f"\\{char}")
251
+
252
+ # Quote the value if it contains spaces
253
+ if " " in escaped:
254
+ escaped = f'"{escaped}"'
255
+
256
+ return escaped
257
+
258
+ def _resolve_field_name(
259
+ self, field_name: str, operator: str, preferred_analyzer: Optional[str] = None
260
+ ) -> Tuple[str, bool]:
261
+ """Resolve field name based on mappings and operator.
262
+
263
+ Args:
264
+ field_name: The TQL field name
265
+ operator: The operator being used
266
+ preferred_analyzer: Preferred analyzer for text operations
267
+
268
+ Returns:
269
+ Tuple of (resolved_field_name, use_wildcard_conversion)
270
+ """
271
+ # Check intelligent mappings first
272
+ if field_name in self.intelligent_mappings:
273
+ field_mapping = self.intelligent_mappings[field_name]
274
+ resolved_field = field_mapping.get_field_for_operator(operator, preferred_analyzer)
275
+ use_wildcard = field_mapping.needs_wildcard_conversion(operator, preferred_analyzer)
276
+ # If resolved field is empty, use the original field name
277
+ if not resolved_field:
278
+ resolved_field = field_name
279
+ return resolved_field, use_wildcard
280
+
281
+ # Check simple mappings
282
+ elif field_name in self.simple_mappings:
283
+ return self.simple_mappings[field_name], False
284
+
285
+ # No mapping, use field name as-is
286
+ else:
287
+ return field_name, False
288
+
289
+ def _convert_value(self, value: Any) -> Any:
290
+ """Convert value types for Lucene compatibility.
291
+
292
+ Args:
293
+ value: Value to convert
294
+
295
+ Returns:
296
+ Converted value (bool, None, or original)
297
+ """
298
+ if isinstance(value, str):
299
+ if value.lower() == "true":
300
+ return True
301
+ elif value.lower() == "false":
302
+ return False
303
+ elif value.lower() == "null":
304
+ return None
305
+ return value