tellaro-query-language 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tellaro_query_language-0.1.0.dist-info/LICENSE +21 -0
- tellaro_query_language-0.1.0.dist-info/METADATA +401 -0
- tellaro_query_language-0.1.0.dist-info/RECORD +56 -0
- tellaro_query_language-0.1.0.dist-info/WHEEL +4 -0
- tellaro_query_language-0.1.0.dist-info/entry_points.txt +7 -0
- tql/__init__.py +47 -0
- tql/analyzer.py +385 -0
- tql/cache/__init__.py +7 -0
- tql/cache/base.py +25 -0
- tql/cache/memory.py +63 -0
- tql/cache/redis.py +68 -0
- tql/core.py +929 -0
- tql/core_components/README.md +92 -0
- tql/core_components/__init__.py +20 -0
- tql/core_components/file_operations.py +113 -0
- tql/core_components/opensearch_operations.py +869 -0
- tql/core_components/stats_operations.py +200 -0
- tql/core_components/validation_operations.py +599 -0
- tql/evaluator.py +379 -0
- tql/evaluator_components/README.md +131 -0
- tql/evaluator_components/__init__.py +17 -0
- tql/evaluator_components/field_access.py +176 -0
- tql/evaluator_components/special_expressions.py +296 -0
- tql/evaluator_components/value_comparison.py +315 -0
- tql/exceptions.py +160 -0
- tql/geoip_normalizer.py +233 -0
- tql/mutator_analyzer.py +830 -0
- tql/mutators/__init__.py +222 -0
- tql/mutators/base.py +78 -0
- tql/mutators/dns.py +316 -0
- tql/mutators/encoding.py +218 -0
- tql/mutators/geo.py +363 -0
- tql/mutators/list.py +212 -0
- tql/mutators/network.py +163 -0
- tql/mutators/security.py +225 -0
- tql/mutators/string.py +165 -0
- tql/opensearch.py +78 -0
- tql/opensearch_components/README.md +130 -0
- tql/opensearch_components/__init__.py +17 -0
- tql/opensearch_components/field_mapping.py +399 -0
- tql/opensearch_components/lucene_converter.py +305 -0
- tql/opensearch_components/query_converter.py +775 -0
- tql/opensearch_mappings.py +309 -0
- tql/opensearch_stats.py +451 -0
- tql/parser.py +1363 -0
- tql/parser_components/README.md +72 -0
- tql/parser_components/__init__.py +20 -0
- tql/parser_components/ast_builder.py +162 -0
- tql/parser_components/error_analyzer.py +101 -0
- tql/parser_components/field_extractor.py +112 -0
- tql/parser_components/grammar.py +473 -0
- tql/post_processor.py +737 -0
- tql/scripts.py +124 -0
- tql/stats_evaluator.py +444 -0
- tql/stats_transformer.py +184 -0
- tql/validators.py +110 -0
|
@@ -0,0 +1,399 @@
|
|
|
1
|
+
"""Field mapping support for OpenSearch backend.
|
|
2
|
+
|
|
3
|
+
This module provides the FieldMapping class for intelligent field selection
|
|
4
|
+
based on operators and field types in OpenSearch.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Any, Dict, Optional
|
|
8
|
+
|
|
9
|
+
from ..exceptions import TQLTypeError, TQLUnsupportedOperationError
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class FieldMapping:
|
|
13
|
+
"""Represents field mapping information for intelligent field selection."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, mapping_info: Dict[str, Any]): # noqa: C901
|
|
16
|
+
"""Initialize field mapping.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
mapping_info: Dictionary containing field mapping information.
|
|
20
|
+
Supports multiple formats:
|
|
21
|
+
|
|
22
|
+
1. OpenSearch-style mapping with subfields:
|
|
23
|
+
{
|
|
24
|
+
"type": "text",
|
|
25
|
+
"fields": {
|
|
26
|
+
"keyword": {"type": "keyword"},
|
|
27
|
+
"english": {"type": "text", "analyzer": "english"}
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
2. Flat format with field variants:
|
|
32
|
+
{
|
|
33
|
+
"field_name": "keyword",
|
|
34
|
+
"field_name.text": {"type": "text", "analyzer": "standard"},
|
|
35
|
+
"field_name.english": {"type": "text", "analyzer": "english"}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
3. Legacy format:
|
|
39
|
+
{
|
|
40
|
+
"field_name": "keyword",
|
|
41
|
+
"field_name.text": "text",
|
|
42
|
+
"analyzer": "standard"
|
|
43
|
+
}
|
|
44
|
+
"""
|
|
45
|
+
self.mappings = mapping_info
|
|
46
|
+
self.keyword_field = None
|
|
47
|
+
self.text_fields = {} # analyzer -> field_name mapping
|
|
48
|
+
self.default_analyzer = mapping_info.get("analyzer", "standard")
|
|
49
|
+
self.field_types = {} # field_name -> type mapping for all fields
|
|
50
|
+
self.base_field_name = None # The main field name without suffixes
|
|
51
|
+
|
|
52
|
+
# Check if this is an OpenSearch-style mapping with "type"
|
|
53
|
+
if "type" in mapping_info and not any(
|
|
54
|
+
k for k in mapping_info.keys() if k not in ["type", "fields", "analyzer"]
|
|
55
|
+
):
|
|
56
|
+
# This is an OpenSearch-style single field mapping
|
|
57
|
+
base_type = mapping_info["type"]
|
|
58
|
+
subfields = mapping_info.get("fields", {})
|
|
59
|
+
|
|
60
|
+
# Determine base field name from context (will be set by backend)
|
|
61
|
+
# For now, use empty string as placeholder
|
|
62
|
+
self.base_field_name = ""
|
|
63
|
+
|
|
64
|
+
# Process base field
|
|
65
|
+
if base_type == "keyword":
|
|
66
|
+
self.keyword_field = self.base_field_name
|
|
67
|
+
self.field_types[self.base_field_name] = "keyword"
|
|
68
|
+
elif base_type == "text":
|
|
69
|
+
analyzer = mapping_info.get("analyzer", "standard")
|
|
70
|
+
self.text_fields[analyzer] = self.base_field_name
|
|
71
|
+
self.field_types[self.base_field_name] = "text"
|
|
72
|
+
else:
|
|
73
|
+
self.field_types[self.base_field_name] = base_type
|
|
74
|
+
|
|
75
|
+
# Process subfields
|
|
76
|
+
for subfield_name, subfield_config in subfields.items():
|
|
77
|
+
if isinstance(subfield_config, dict):
|
|
78
|
+
subfield_type = subfield_config.get("type")
|
|
79
|
+
field_path = (
|
|
80
|
+
f"{self.base_field_name}.{subfield_name}" if self.base_field_name else f".{subfield_name}"
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
if subfield_type == "keyword":
|
|
84
|
+
self.keyword_field = field_path
|
|
85
|
+
self.field_types[field_path] = "keyword"
|
|
86
|
+
elif subfield_type == "text":
|
|
87
|
+
analyzer = subfield_config.get("analyzer", "standard")
|
|
88
|
+
self.text_fields[analyzer] = field_path
|
|
89
|
+
self.field_types[field_path] = "text"
|
|
90
|
+
elif subfield_type:
|
|
91
|
+
self.field_types[field_path] = subfield_type
|
|
92
|
+
else:
|
|
93
|
+
# Original flat format parsing
|
|
94
|
+
for field_name, field_config in mapping_info.items():
|
|
95
|
+
if field_name == "analyzer":
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
|
+
# Extract base field name (without .text, .keyword suffixes)
|
|
99
|
+
if not self.base_field_name and not field_name.startswith("_"):
|
|
100
|
+
if "." not in field_name:
|
|
101
|
+
self.base_field_name = field_name
|
|
102
|
+
else:
|
|
103
|
+
# Get the part before the first dot
|
|
104
|
+
self.base_field_name = field_name.split(".")[0]
|
|
105
|
+
|
|
106
|
+
if isinstance(field_config, dict):
|
|
107
|
+
# New format: {"type": "text", "analyzer": "english"}
|
|
108
|
+
field_type = field_config.get("type")
|
|
109
|
+
analyzer = field_config.get("analyzer", "standard")
|
|
110
|
+
|
|
111
|
+
if field_type:
|
|
112
|
+
self.field_types[field_name] = field_type
|
|
113
|
+
|
|
114
|
+
if field_type == "keyword":
|
|
115
|
+
self.keyword_field = field_name
|
|
116
|
+
elif field_type == "text":
|
|
117
|
+
self.text_fields[analyzer] = field_name
|
|
118
|
+
else:
|
|
119
|
+
# Legacy format: "keyword" or "text" or other types
|
|
120
|
+
field_type = field_config
|
|
121
|
+
self.field_types[field_name] = field_type
|
|
122
|
+
|
|
123
|
+
if field_type == "keyword":
|
|
124
|
+
self.keyword_field = field_name
|
|
125
|
+
elif field_type == "text":
|
|
126
|
+
# Use default analyzer for legacy text fields
|
|
127
|
+
self.text_fields[self.default_analyzer] = field_name
|
|
128
|
+
|
|
129
|
+
def set_base_field_name(self, base_field_name: str): # noqa: C901
|
|
130
|
+
"""Set the base field name and update field paths for OpenSearch-style mappings.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
base_field_name: The base field name to use
|
|
134
|
+
"""
|
|
135
|
+
if self.base_field_name == "": # Only update if it was a placeholder
|
|
136
|
+
old_base = self.base_field_name
|
|
137
|
+
self.base_field_name = base_field_name
|
|
138
|
+
|
|
139
|
+
# Update all field paths
|
|
140
|
+
new_field_types = {}
|
|
141
|
+
for field_path, field_type in self.field_types.items():
|
|
142
|
+
if field_path == old_base:
|
|
143
|
+
new_field_types[base_field_name] = field_type
|
|
144
|
+
elif field_path.startswith("."):
|
|
145
|
+
new_field_types[f"{base_field_name}{field_path}"] = field_type
|
|
146
|
+
else:
|
|
147
|
+
new_field_types[field_path] = field_type
|
|
148
|
+
self.field_types = new_field_types
|
|
149
|
+
|
|
150
|
+
# Update keyword field
|
|
151
|
+
if self.keyword_field == old_base:
|
|
152
|
+
self.keyword_field = base_field_name
|
|
153
|
+
elif self.keyword_field is not None and self.keyword_field.startswith("."):
|
|
154
|
+
self.keyword_field = f"{base_field_name}{self.keyword_field}"
|
|
155
|
+
elif self.keyword_field == "":
|
|
156
|
+
# If keyword field is empty string (from base type), set it to the base field name
|
|
157
|
+
self.keyword_field = base_field_name
|
|
158
|
+
|
|
159
|
+
# Update text fields
|
|
160
|
+
new_text_fields = {}
|
|
161
|
+
for analyzer, field_path in self.text_fields.items():
|
|
162
|
+
if field_path == old_base:
|
|
163
|
+
new_text_fields[analyzer] = base_field_name
|
|
164
|
+
elif field_path.startswith("."):
|
|
165
|
+
new_text_fields[analyzer] = f"{base_field_name}{field_path}"
|
|
166
|
+
else:
|
|
167
|
+
new_text_fields[analyzer] = field_path
|
|
168
|
+
self.text_fields = new_text_fields
|
|
169
|
+
|
|
170
|
+
def get_text_field_for_analyzer(self, preferred_analyzer: Optional[str] = None) -> Optional[str]:
|
|
171
|
+
"""Get the best text field for the given analyzer preference.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
preferred_analyzer: Preferred analyzer (e.g., 'english', 'autocomplete')
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
Field name for the best matching text field, or None if no text fields
|
|
178
|
+
"""
|
|
179
|
+
if not self.text_fields:
|
|
180
|
+
return None
|
|
181
|
+
|
|
182
|
+
# Try exact match first
|
|
183
|
+
if preferred_analyzer and preferred_analyzer in self.text_fields:
|
|
184
|
+
return self.text_fields[preferred_analyzer]
|
|
185
|
+
|
|
186
|
+
# Try default analyzer
|
|
187
|
+
if self.default_analyzer in self.text_fields:
|
|
188
|
+
return self.text_fields[self.default_analyzer]
|
|
189
|
+
|
|
190
|
+
# Try standard analyzer as fallback
|
|
191
|
+
if "standard" in self.text_fields:
|
|
192
|
+
return self.text_fields["standard"]
|
|
193
|
+
|
|
194
|
+
# Return any available text field
|
|
195
|
+
return next(iter(self.text_fields.values()))
|
|
196
|
+
|
|
197
|
+
def get_field_for_operator(self, operator: str, preferred_analyzer: Optional[str] = None) -> str: # noqa: C901
|
|
198
|
+
"""Get the appropriate field name for the given operator.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
operator: The TQL operator being used
|
|
202
|
+
preferred_analyzer: Preferred analyzer for text operations
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
The field name to use
|
|
206
|
+
|
|
207
|
+
Raises:
|
|
208
|
+
TQLUnsupportedOperationError: If operator is not supported for available fields
|
|
209
|
+
"""
|
|
210
|
+
# Operators that work best with keyword fields (exact matching)
|
|
211
|
+
keyword_operators = {
|
|
212
|
+
"eq",
|
|
213
|
+
"=",
|
|
214
|
+
"ne",
|
|
215
|
+
"!=",
|
|
216
|
+
"in",
|
|
217
|
+
"not_in",
|
|
218
|
+
"exists",
|
|
219
|
+
"not_exists",
|
|
220
|
+
"is",
|
|
221
|
+
"any",
|
|
222
|
+
"all",
|
|
223
|
+
"not_any",
|
|
224
|
+
"not_all",
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
# Operators that work best with text fields (full-text search)
|
|
228
|
+
text_operators = {"contains", "regexp", "not_regexp"}
|
|
229
|
+
|
|
230
|
+
# Operators that require numeric/date fields
|
|
231
|
+
range_operators = {">", ">=", "<", "<=", "gt", "gte", "lt", "lte", "between", "not_between"}
|
|
232
|
+
|
|
233
|
+
# Operators that work with both but prefer keyword
|
|
234
|
+
wildcard_operators = {"startswith", "endswith"}
|
|
235
|
+
|
|
236
|
+
if operator in keyword_operators:
|
|
237
|
+
if self.keyword_field:
|
|
238
|
+
# Return base field name if keyword field is empty (happens with simple type mappings)
|
|
239
|
+
return self.keyword_field
|
|
240
|
+
else:
|
|
241
|
+
# Check if we have numeric/IP fields - they also support equality
|
|
242
|
+
for field_name, field_type in self.field_types.items():
|
|
243
|
+
if field_type in {"integer", "long", "float", "double", "boolean", "date", "ip"}:
|
|
244
|
+
return field_name
|
|
245
|
+
# Fallback to any available text field
|
|
246
|
+
text_field = self.get_text_field_for_analyzer(preferred_analyzer)
|
|
247
|
+
if text_field:
|
|
248
|
+
return text_field
|
|
249
|
+
# If we have any field types at all, return the first one
|
|
250
|
+
# This handles cases where we have fields but they don't match the above categories
|
|
251
|
+
if self.field_types:
|
|
252
|
+
# Return the first non-empty field name
|
|
253
|
+
for field_name in self.field_types.keys():
|
|
254
|
+
if field_name: # Skip empty string keys
|
|
255
|
+
return field_name
|
|
256
|
+
# If no fields at all, return the base field name
|
|
257
|
+
if self.base_field_name:
|
|
258
|
+
return self.base_field_name
|
|
259
|
+
# Last resort - return empty string
|
|
260
|
+
return ""
|
|
261
|
+
elif operator in text_operators:
|
|
262
|
+
# Try to get text field with preferred analyzer
|
|
263
|
+
text_field = self.get_text_field_for_analyzer(preferred_analyzer)
|
|
264
|
+
if text_field:
|
|
265
|
+
return text_field
|
|
266
|
+
elif self.keyword_field:
|
|
267
|
+
# Will need special handling for wildcard conversion
|
|
268
|
+
# Return base field name if keyword field is empty
|
|
269
|
+
return self.keyword_field
|
|
270
|
+
elif operator in wildcard_operators:
|
|
271
|
+
# Prefer keyword for wildcard operations
|
|
272
|
+
if self.keyword_field:
|
|
273
|
+
# Return base field name if keyword field is empty
|
|
274
|
+
return self.keyword_field
|
|
275
|
+
else:
|
|
276
|
+
text_field = self.get_text_field_for_analyzer(preferred_analyzer)
|
|
277
|
+
if text_field:
|
|
278
|
+
return text_field
|
|
279
|
+
elif operator in range_operators:
|
|
280
|
+
# Range operators prefer numeric/date fields but can work with keyword fields
|
|
281
|
+
# Check what field types we have
|
|
282
|
+
has_numeric_or_date = any(
|
|
283
|
+
ft in {"integer", "long", "float", "double", "date"} for ft in self.field_types.values()
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
if has_numeric_or_date:
|
|
287
|
+
# Return the first numeric/date field found
|
|
288
|
+
for field_name, field_type in self.field_types.items():
|
|
289
|
+
if field_type in {"integer", "long", "float", "double", "date"}:
|
|
290
|
+
return field_name
|
|
291
|
+
|
|
292
|
+
# No numeric fields - try keyword field (OpenSearch supports range queries on keywords)
|
|
293
|
+
if self.keyword_field:
|
|
294
|
+
# Return base field name if keyword field is empty
|
|
295
|
+
return self.keyword_field
|
|
296
|
+
|
|
297
|
+
# Only text fields available - this won't work
|
|
298
|
+
if self.text_fields:
|
|
299
|
+
field_name = self.base_field_name or "field"
|
|
300
|
+
raise TQLTypeError(
|
|
301
|
+
field=field_name,
|
|
302
|
+
field_type="text",
|
|
303
|
+
operator=operator,
|
|
304
|
+
valid_operators=["=", "!=", "contains", "startswith", "endswith"],
|
|
305
|
+
)
|
|
306
|
+
elif operator in {"cidr", "not_cidr"}:
|
|
307
|
+
# CIDR works best with IP field type
|
|
308
|
+
# First check for IP fields
|
|
309
|
+
for field_name, field_type in self.field_types.items():
|
|
310
|
+
if field_type == "ip":
|
|
311
|
+
return field_name
|
|
312
|
+
|
|
313
|
+
# Fallback to keyword field
|
|
314
|
+
if self.keyword_field:
|
|
315
|
+
# Return base field name if keyword field is empty
|
|
316
|
+
return self.keyword_field
|
|
317
|
+
else:
|
|
318
|
+
raise TQLUnsupportedOperationError("CIDR operator requires keyword or IP field type")
|
|
319
|
+
|
|
320
|
+
# If we get here, no suitable field was found
|
|
321
|
+
available_types = []
|
|
322
|
+
if self.keyword_field:
|
|
323
|
+
available_types.append(f"{self.keyword_field}(keyword)")
|
|
324
|
+
for analyzer, field_name in self.text_fields.items():
|
|
325
|
+
available_types.append(f"{field_name}(text:{analyzer})")
|
|
326
|
+
|
|
327
|
+
raise TQLUnsupportedOperationError(
|
|
328
|
+
f"Operator '{operator}' is not supported for available field types: {available_types}"
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
def needs_wildcard_conversion(self, operator: str, preferred_analyzer: Optional[str] = None) -> bool:
|
|
332
|
+
"""Check if operator needs wildcard conversion for keyword fields.
|
|
333
|
+
|
|
334
|
+
Args:
|
|
335
|
+
operator: The TQL operator
|
|
336
|
+
preferred_analyzer: Preferred analyzer for text operations
|
|
337
|
+
|
|
338
|
+
Returns:
|
|
339
|
+
True if wildcard conversion is needed
|
|
340
|
+
"""
|
|
341
|
+
text_operators = {"contains"}
|
|
342
|
+
selected_field = self.get_field_for_operator(operator, preferred_analyzer)
|
|
343
|
+
|
|
344
|
+
return operator in text_operators and selected_field == self.keyword_field and not self.text_fields
|
|
345
|
+
|
|
346
|
+
def validate_operator_for_field_type(self, operator: str, raise_on_error: bool = True) -> bool:
|
|
347
|
+
"""Validate if an operator is compatible with available field types.
|
|
348
|
+
|
|
349
|
+
Args:
|
|
350
|
+
operator: The TQL operator to validate
|
|
351
|
+
raise_on_error: If True, raise TQLTypeError on incompatibility
|
|
352
|
+
|
|
353
|
+
Returns:
|
|
354
|
+
True if operator is compatible, False otherwise
|
|
355
|
+
|
|
356
|
+
Raises:
|
|
357
|
+
TQLTypeError: If operator is incompatible and raise_on_error is True
|
|
358
|
+
"""
|
|
359
|
+
# Define operator compatibility rules
|
|
360
|
+
numeric_types = {"integer", "long", "float", "double"}
|
|
361
|
+
range_operators = {">", ">=", "<", "<=", "gt", "gte", "lt", "lte", "between", "not_between"}
|
|
362
|
+
|
|
363
|
+
# Check if we have appropriate fields for range operators
|
|
364
|
+
if operator in range_operators:
|
|
365
|
+
has_numeric = any(ft in numeric_types for ft in self.field_types.values())
|
|
366
|
+
has_keyword = self.keyword_field is not None
|
|
367
|
+
|
|
368
|
+
# Range operators work best with numeric fields, but OpenSearch also supports them on keyword fields
|
|
369
|
+
# Only fail if we have text fields only
|
|
370
|
+
if not has_numeric and not has_keyword:
|
|
371
|
+
if self.text_fields and raise_on_error:
|
|
372
|
+
field_name = self.base_field_name or "field"
|
|
373
|
+
raise TQLTypeError(
|
|
374
|
+
field=field_name,
|
|
375
|
+
field_type="text",
|
|
376
|
+
operator=operator,
|
|
377
|
+
valid_operators=["=", "!=", "contains", "startswith", "endswith"],
|
|
378
|
+
)
|
|
379
|
+
elif not raise_on_error:
|
|
380
|
+
return False
|
|
381
|
+
|
|
382
|
+
# CIDR operator requires IP or keyword field
|
|
383
|
+
if operator == "cidr":
|
|
384
|
+
has_ip = any(ft == "ip" for ft in self.field_types.values())
|
|
385
|
+
has_keyword = self.keyword_field is not None
|
|
386
|
+
|
|
387
|
+
if not has_ip and not has_keyword:
|
|
388
|
+
if raise_on_error:
|
|
389
|
+
field_name = self.base_field_name or "field"
|
|
390
|
+
field_type = next(iter(self.field_types.values()), "unknown")
|
|
391
|
+
raise TQLTypeError(
|
|
392
|
+
field=field_name,
|
|
393
|
+
field_type=field_type,
|
|
394
|
+
operator=operator,
|
|
395
|
+
valid_operators=["=", "!=", "contains"] if field_type == "text" else [],
|
|
396
|
+
)
|
|
397
|
+
return False
|
|
398
|
+
|
|
399
|
+
return True
|