tellaro-query-language 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tellaro_query_language-0.1.0.dist-info/LICENSE +21 -0
- tellaro_query_language-0.1.0.dist-info/METADATA +401 -0
- tellaro_query_language-0.1.0.dist-info/RECORD +56 -0
- tellaro_query_language-0.1.0.dist-info/WHEEL +4 -0
- tellaro_query_language-0.1.0.dist-info/entry_points.txt +7 -0
- tql/__init__.py +47 -0
- tql/analyzer.py +385 -0
- tql/cache/__init__.py +7 -0
- tql/cache/base.py +25 -0
- tql/cache/memory.py +63 -0
- tql/cache/redis.py +68 -0
- tql/core.py +929 -0
- tql/core_components/README.md +92 -0
- tql/core_components/__init__.py +20 -0
- tql/core_components/file_operations.py +113 -0
- tql/core_components/opensearch_operations.py +869 -0
- tql/core_components/stats_operations.py +200 -0
- tql/core_components/validation_operations.py +599 -0
- tql/evaluator.py +379 -0
- tql/evaluator_components/README.md +131 -0
- tql/evaluator_components/__init__.py +17 -0
- tql/evaluator_components/field_access.py +176 -0
- tql/evaluator_components/special_expressions.py +296 -0
- tql/evaluator_components/value_comparison.py +315 -0
- tql/exceptions.py +160 -0
- tql/geoip_normalizer.py +233 -0
- tql/mutator_analyzer.py +830 -0
- tql/mutators/__init__.py +222 -0
- tql/mutators/base.py +78 -0
- tql/mutators/dns.py +316 -0
- tql/mutators/encoding.py +218 -0
- tql/mutators/geo.py +363 -0
- tql/mutators/list.py +212 -0
- tql/mutators/network.py +163 -0
- tql/mutators/security.py +225 -0
- tql/mutators/string.py +165 -0
- tql/opensearch.py +78 -0
- tql/opensearch_components/README.md +130 -0
- tql/opensearch_components/__init__.py +17 -0
- tql/opensearch_components/field_mapping.py +399 -0
- tql/opensearch_components/lucene_converter.py +305 -0
- tql/opensearch_components/query_converter.py +775 -0
- tql/opensearch_mappings.py +309 -0
- tql/opensearch_stats.py +451 -0
- tql/parser.py +1363 -0
- tql/parser_components/README.md +72 -0
- tql/parser_components/__init__.py +20 -0
- tql/parser_components/ast_builder.py +162 -0
- tql/parser_components/error_analyzer.py +101 -0
- tql/parser_components/field_extractor.py +112 -0
- tql/parser_components/grammar.py +473 -0
- tql/post_processor.py +737 -0
- tql/scripts.py +124 -0
- tql/stats_evaluator.py +444 -0
- tql/stats_transformer.py +184 -0
- tql/validators.py +110 -0
|
@@ -0,0 +1,599 @@
|
|
|
1
|
+
"""Validation operations for TQL.
|
|
2
|
+
|
|
3
|
+
This module handles query validation, type checking, and performance analysis.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Any, Dict, List, Optional
|
|
7
|
+
|
|
8
|
+
from ..exceptions import TQLFieldError, TQLTypeError, TQLValidationError
|
|
9
|
+
from ..parser import TQLParser
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ValidationOperations:
|
|
13
|
+
"""Handles validation operations for TQL."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, parser: TQLParser, field_mappings: Dict[str, Any]):
|
|
16
|
+
"""Initialize validation operations.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
parser: TQL parser instance
|
|
20
|
+
field_mappings: Field mapping configuration
|
|
21
|
+
"""
|
|
22
|
+
self.parser = parser
|
|
23
|
+
self.field_mappings = field_mappings
|
|
24
|
+
|
|
25
|
+
def validate(self, query: str, validate_fields: bool = False) -> bool:
|
|
26
|
+
"""Validate a TQL query for syntax and optionally field names.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
query: TQL query string
|
|
30
|
+
validate_fields: Whether to validate field names against mappings
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
True if query is valid
|
|
34
|
+
|
|
35
|
+
Raises:
|
|
36
|
+
Various TQL exceptions if validation fails
|
|
37
|
+
"""
|
|
38
|
+
# Parse the query
|
|
39
|
+
ast = self.parser.parse(query)
|
|
40
|
+
|
|
41
|
+
# Validate field names if requested
|
|
42
|
+
if validate_fields:
|
|
43
|
+
self._validate_fields_in_ast(ast)
|
|
44
|
+
|
|
45
|
+
# Always check type compatibility
|
|
46
|
+
self._check_type_compatibility(ast)
|
|
47
|
+
|
|
48
|
+
return True
|
|
49
|
+
|
|
50
|
+
def _validate_fields_in_ast(self, ast: Dict[str, Any]) -> None:
|
|
51
|
+
"""Recursively validate field names in AST against field mappings."""
|
|
52
|
+
if isinstance(ast, dict):
|
|
53
|
+
if ast.get("type") == "comparison" and "field" in ast:
|
|
54
|
+
field = ast["field"]
|
|
55
|
+
if field not in self.field_mappings:
|
|
56
|
+
raise TQLFieldError(field=field, available_fields=sorted(self.field_mappings.keys()))
|
|
57
|
+
elif ast.get("type") == "logical_op":
|
|
58
|
+
left = ast.get("left")
|
|
59
|
+
right = ast.get("right")
|
|
60
|
+
if left:
|
|
61
|
+
self._validate_fields_in_ast(left)
|
|
62
|
+
if right:
|
|
63
|
+
self._validate_fields_in_ast(right)
|
|
64
|
+
elif ast.get("type") == "unary_op":
|
|
65
|
+
operand = ast.get("operand")
|
|
66
|
+
if operand:
|
|
67
|
+
self._validate_fields_in_ast(operand)
|
|
68
|
+
|
|
69
|
+
def _check_type_compatibility(self, ast: Dict[str, Any]) -> None: # noqa: C901
|
|
70
|
+
"""Check type compatibility between fields, operators, and values.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
ast: The AST to validate
|
|
74
|
+
|
|
75
|
+
Raises:
|
|
76
|
+
TQLTypeError: If type incompatibilities are found
|
|
77
|
+
TQLOperatorError: If operators are used incorrectly
|
|
78
|
+
"""
|
|
79
|
+
if not isinstance(ast, dict):
|
|
80
|
+
return
|
|
81
|
+
|
|
82
|
+
node_type = ast.get("type")
|
|
83
|
+
|
|
84
|
+
if node_type == "comparison":
|
|
85
|
+
field = ast.get("field")
|
|
86
|
+
operator = ast.get("operator")
|
|
87
|
+
value = ast.get("value")
|
|
88
|
+
type_hint = ast.get("type_hint")
|
|
89
|
+
|
|
90
|
+
# Only proceed if we have required fields
|
|
91
|
+
if not field or not operator:
|
|
92
|
+
return
|
|
93
|
+
|
|
94
|
+
# Handle multi-field scenarios first (for 'in' operator with field list)
|
|
95
|
+
if operator == "in" and isinstance(ast.get("original_fields"), list):
|
|
96
|
+
# This is a "value in [field1, field2, ...]" that was expanded
|
|
97
|
+
# Check each field's type
|
|
98
|
+
for check_field in ast.get("original_fields", [field]):
|
|
99
|
+
field_info = self.field_mappings.get(check_field, {})
|
|
100
|
+
if field_info:
|
|
101
|
+
self._validate_comparison_for_field(check_field, field_info, operator, value, type_hint)
|
|
102
|
+
return
|
|
103
|
+
|
|
104
|
+
# Get field type from mappings
|
|
105
|
+
field_info = self.field_mappings.get(field, {})
|
|
106
|
+
if field_info:
|
|
107
|
+
self._validate_comparison_for_field(field, field_info, operator, value, type_hint)
|
|
108
|
+
|
|
109
|
+
elif node_type == "logical_op":
|
|
110
|
+
left = ast.get("left")
|
|
111
|
+
right = ast.get("right")
|
|
112
|
+
if left:
|
|
113
|
+
self._check_type_compatibility(left)
|
|
114
|
+
if right:
|
|
115
|
+
self._check_type_compatibility(right)
|
|
116
|
+
|
|
117
|
+
elif node_type == "unary_op":
|
|
118
|
+
operand = ast.get("operand")
|
|
119
|
+
if operand:
|
|
120
|
+
self._check_type_compatibility(operand)
|
|
121
|
+
|
|
122
|
+
elif node_type == "query_with_stats":
|
|
123
|
+
if "filter" in ast:
|
|
124
|
+
self._check_type_compatibility(ast["filter"])
|
|
125
|
+
|
|
126
|
+
def _validate_comparison_for_field( # noqa: C901
|
|
127
|
+
self, field: str, field_info: Any, operator: str, value: Any, type_hint: Optional[str]
|
|
128
|
+
) -> None:
|
|
129
|
+
"""Validate a comparison for a specific field.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
field: Field name
|
|
133
|
+
field_info: Field mapping information
|
|
134
|
+
operator: Comparison operator
|
|
135
|
+
value: Comparison value
|
|
136
|
+
type_hint: Optional type hint from query
|
|
137
|
+
"""
|
|
138
|
+
# Determine the field type
|
|
139
|
+
field_type: Optional[str] = None
|
|
140
|
+
if isinstance(field_info, str):
|
|
141
|
+
# Simple mapping: could be a field name or a type
|
|
142
|
+
if field_info in [
|
|
143
|
+
"keyword",
|
|
144
|
+
"text",
|
|
145
|
+
"long",
|
|
146
|
+
"integer",
|
|
147
|
+
"short",
|
|
148
|
+
"byte",
|
|
149
|
+
"double",
|
|
150
|
+
"float",
|
|
151
|
+
"boolean",
|
|
152
|
+
"date",
|
|
153
|
+
"ip",
|
|
154
|
+
]:
|
|
155
|
+
field_type = field_info
|
|
156
|
+
else:
|
|
157
|
+
# It's a mapped field name, we don't know the type
|
|
158
|
+
return
|
|
159
|
+
elif isinstance(field_info, dict):
|
|
160
|
+
# Complex mapping
|
|
161
|
+
field_type = field_info.get("type")
|
|
162
|
+
if not field_type:
|
|
163
|
+
# Check if it's an OpenSearch-style mapping with nested field definition
|
|
164
|
+
# e.g., {"message": {"type": "text", "analyzer": "standard"}}
|
|
165
|
+
if field in field_info and isinstance(field_info[field], dict):
|
|
166
|
+
field_type = field_info[field].get("type")
|
|
167
|
+
# Check if it's a multi-field mapping
|
|
168
|
+
elif "fields" in field_info:
|
|
169
|
+
# Multi-field, check the main field type
|
|
170
|
+
field_type = field_info.get("type", "text")
|
|
171
|
+
else:
|
|
172
|
+
# No type info available
|
|
173
|
+
return
|
|
174
|
+
else:
|
|
175
|
+
# No type info available
|
|
176
|
+
return
|
|
177
|
+
|
|
178
|
+
# Apply type hint override if provided
|
|
179
|
+
if type_hint and field_type:
|
|
180
|
+
self._validate_type_hint_compatibility(field, field_type, type_hint)
|
|
181
|
+
# Use the type hint for validation
|
|
182
|
+
validation_type = self._map_type_hint_to_es_type(type_hint)
|
|
183
|
+
else:
|
|
184
|
+
validation_type = field_type
|
|
185
|
+
|
|
186
|
+
# Validate based on field type
|
|
187
|
+
if validation_type:
|
|
188
|
+
self._validate_simple_type_compatibility(field, validation_type, operator)
|
|
189
|
+
|
|
190
|
+
# Additional validation for type mismatches
|
|
191
|
+
# Check if we're using comparison operators with incompatible value types
|
|
192
|
+
if validation_type in ["text", "keyword"] and operator in [">", "<", ">=", "<=", "gt", "lt", "gte", "lte"]:
|
|
193
|
+
# String comparison operators require string values
|
|
194
|
+
# Check if value looks like a number (parser returns all values as strings)
|
|
195
|
+
if isinstance(value, str) and value.replace(".", "", 1).replace("-", "", 1).isdigit():
|
|
196
|
+
raise TQLTypeError(
|
|
197
|
+
field=field,
|
|
198
|
+
field_type=validation_type,
|
|
199
|
+
operator=operator,
|
|
200
|
+
suggestions=["Use a string value for comparison, or use a numeric field"],
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
def _validate_type_hint_compatibility(self, field: str, field_type: str, type_hint: str) -> None:
|
|
204
|
+
"""Validate that a type hint is compatible with the field's actual type."""
|
|
205
|
+
# Map type hints to ES types
|
|
206
|
+
hint_es_type = self._map_type_hint_to_es_type(type_hint)
|
|
207
|
+
|
|
208
|
+
# Check compatibility
|
|
209
|
+
compatible = False
|
|
210
|
+
|
|
211
|
+
# Numeric types can be cast between each other
|
|
212
|
+
numeric_types = {"long", "integer", "short", "byte", "double", "float"}
|
|
213
|
+
if field_type in numeric_types and hint_es_type in numeric_types:
|
|
214
|
+
compatible = True
|
|
215
|
+
# String types
|
|
216
|
+
elif field_type in {"text", "keyword"} and hint_es_type in {"text", "keyword"}:
|
|
217
|
+
compatible = True
|
|
218
|
+
# Boolean
|
|
219
|
+
elif field_type == "boolean" and hint_es_type == "boolean":
|
|
220
|
+
compatible = True
|
|
221
|
+
# Date
|
|
222
|
+
elif field_type == "date" and hint_es_type == "date":
|
|
223
|
+
compatible = True
|
|
224
|
+
# IP
|
|
225
|
+
elif field_type == "ip" and hint_es_type == "ip":
|
|
226
|
+
compatible = True
|
|
227
|
+
# Geo point
|
|
228
|
+
elif field_type == "geo_point" and hint_es_type == "geo_point":
|
|
229
|
+
compatible = True
|
|
230
|
+
# Object/nested
|
|
231
|
+
elif field_type in {"object", "nested"} and hint_es_type == "object":
|
|
232
|
+
compatible = True
|
|
233
|
+
|
|
234
|
+
if not compatible:
|
|
235
|
+
# Use TQLValidationError instead since TQLTypeError requires operator
|
|
236
|
+
raise TQLValidationError(
|
|
237
|
+
f"Type hint '{type_hint}' is incompatible with field '{field}' of type '{field_type}'"
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
def _map_type_hint_to_es_type(self, type_hint: str) -> str:
|
|
241
|
+
"""Map TQL type hints to Elasticsearch/OpenSearch types."""
|
|
242
|
+
mapping = {
|
|
243
|
+
"number": "double",
|
|
244
|
+
"int": "long",
|
|
245
|
+
"float": "double",
|
|
246
|
+
"decimal": "double",
|
|
247
|
+
"string": "keyword",
|
|
248
|
+
"text": "text",
|
|
249
|
+
"bool": "boolean",
|
|
250
|
+
"boolean": "boolean",
|
|
251
|
+
"date": "date",
|
|
252
|
+
"array": "keyword", # Arrays don't have a specific type
|
|
253
|
+
"geo": "geo_point",
|
|
254
|
+
"object": "object",
|
|
255
|
+
"ip": "ip",
|
|
256
|
+
}
|
|
257
|
+
return mapping.get(type_hint.lower(), "keyword")
|
|
258
|
+
|
|
259
|
+
def _validate_simple_type_compatibility(self, field: str, field_type: str, operator: str) -> None:
|
|
260
|
+
"""Validate operator compatibility with field type.
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
field: Field name for error messages
|
|
264
|
+
field_type: The field's data type
|
|
265
|
+
operator: The operator being used
|
|
266
|
+
|
|
267
|
+
Raises:
|
|
268
|
+
TQLOperatorError: If operator is incompatible with field type
|
|
269
|
+
"""
|
|
270
|
+
# Define operator compatibility by type
|
|
271
|
+
numeric_ops = {
|
|
272
|
+
"eq",
|
|
273
|
+
"=",
|
|
274
|
+
"ne",
|
|
275
|
+
"!=",
|
|
276
|
+
"gt",
|
|
277
|
+
">",
|
|
278
|
+
"gte",
|
|
279
|
+
">=",
|
|
280
|
+
"lt",
|
|
281
|
+
"<",
|
|
282
|
+
"lte",
|
|
283
|
+
"<=",
|
|
284
|
+
"between",
|
|
285
|
+
"not_between",
|
|
286
|
+
"in",
|
|
287
|
+
"not_in",
|
|
288
|
+
"exists",
|
|
289
|
+
"not_exists",
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
# Text fields (analyzed) should not use range operators
|
|
293
|
+
text_ops = {
|
|
294
|
+
"eq",
|
|
295
|
+
"=",
|
|
296
|
+
"ne",
|
|
297
|
+
"!=",
|
|
298
|
+
"contains",
|
|
299
|
+
"not_contains",
|
|
300
|
+
"startswith",
|
|
301
|
+
"not_startswith",
|
|
302
|
+
"endswith",
|
|
303
|
+
"not_endswith",
|
|
304
|
+
"regexp",
|
|
305
|
+
"not_regexp",
|
|
306
|
+
"in",
|
|
307
|
+
"not_in",
|
|
308
|
+
"exists",
|
|
309
|
+
"not_exists",
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
# Keyword fields can use range operators for lexicographic comparison
|
|
313
|
+
keyword_ops = {
|
|
314
|
+
"eq",
|
|
315
|
+
"=",
|
|
316
|
+
"ne",
|
|
317
|
+
"!=",
|
|
318
|
+
"contains",
|
|
319
|
+
"not_contains",
|
|
320
|
+
"startswith",
|
|
321
|
+
"not_startswith",
|
|
322
|
+
"endswith",
|
|
323
|
+
"not_endswith",
|
|
324
|
+
"regexp",
|
|
325
|
+
"not_regexp",
|
|
326
|
+
"in",
|
|
327
|
+
"not_in",
|
|
328
|
+
"exists",
|
|
329
|
+
"not_exists",
|
|
330
|
+
"gt",
|
|
331
|
+
">",
|
|
332
|
+
"gte",
|
|
333
|
+
">=",
|
|
334
|
+
"lt",
|
|
335
|
+
"<",
|
|
336
|
+
"lte",
|
|
337
|
+
"<=",
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
boolean_ops = {"eq", "=", "ne", "!=", "exists", "not_exists", "is", "is_not"}
|
|
341
|
+
|
|
342
|
+
date_ops = {
|
|
343
|
+
"eq",
|
|
344
|
+
"=",
|
|
345
|
+
"ne",
|
|
346
|
+
"!=",
|
|
347
|
+
"gt",
|
|
348
|
+
">",
|
|
349
|
+
"gte",
|
|
350
|
+
">=",
|
|
351
|
+
"lt",
|
|
352
|
+
"<",
|
|
353
|
+
"lte",
|
|
354
|
+
"<=",
|
|
355
|
+
"between",
|
|
356
|
+
"not_between",
|
|
357
|
+
"exists",
|
|
358
|
+
"not_exists",
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
ip_ops = {"eq", "=", "ne", "!=", "cidr", "not_cidr", "in", "not_in", "exists", "not_exists"}
|
|
362
|
+
|
|
363
|
+
# Determine allowed operators based on type
|
|
364
|
+
if field_type in ["long", "integer", "short", "byte", "double", "float"]:
|
|
365
|
+
allowed_ops = numeric_ops
|
|
366
|
+
# type_desc = "numeric" # Not used
|
|
367
|
+
elif field_type == "text":
|
|
368
|
+
allowed_ops = text_ops
|
|
369
|
+
# type_desc = "text" # Not used
|
|
370
|
+
elif field_type == "keyword":
|
|
371
|
+
allowed_ops = keyword_ops
|
|
372
|
+
# type_desc = "keyword" # Not used
|
|
373
|
+
elif field_type == "boolean":
|
|
374
|
+
allowed_ops = boolean_ops
|
|
375
|
+
# type_desc = "boolean" # Not used
|
|
376
|
+
elif field_type == "date":
|
|
377
|
+
allowed_ops = date_ops
|
|
378
|
+
# type_desc = "date" # Not used
|
|
379
|
+
elif field_type == "ip":
|
|
380
|
+
allowed_ops = ip_ops
|
|
381
|
+
# type_desc = "IP" # Not used
|
|
382
|
+
else:
|
|
383
|
+
# Unknown type, allow all operators
|
|
384
|
+
return
|
|
385
|
+
|
|
386
|
+
# Check if operator is allowed
|
|
387
|
+
if operator not in allowed_ops:
|
|
388
|
+
# Create validation error with proper message
|
|
389
|
+
if field_type == "text" and operator in ["gt", ">", "gte", ">=", "lt", "<", "lte", "<="]:
|
|
390
|
+
pass # Error will be raised with proper message below
|
|
391
|
+
else:
|
|
392
|
+
pass # Error will be raised with proper message below
|
|
393
|
+
|
|
394
|
+
# Use TQLTypeError since it's about operator-field type compatibility
|
|
395
|
+
raise TQLTypeError(field=field, field_type=field_type, operator=operator, valid_operators=list(allowed_ops))
|
|
396
|
+
|
|
397
|
+
def check_performance_issues(self, ast: Dict[str, Any], query: str) -> List[Dict[str, Any]]:
|
|
398
|
+
"""Check for potential performance issues in the query.
|
|
399
|
+
|
|
400
|
+
Args:
|
|
401
|
+
ast: Parsed AST
|
|
402
|
+
query: Original query string
|
|
403
|
+
|
|
404
|
+
Returns:
|
|
405
|
+
List of performance issues found
|
|
406
|
+
"""
|
|
407
|
+
issues: List[Dict[str, Any]] = []
|
|
408
|
+
self._traverse_ast_for_performance(ast, issues, query)
|
|
409
|
+
return issues
|
|
410
|
+
|
|
411
|
+
def _traverse_ast_for_performance( # noqa: C901
|
|
412
|
+
self, node: Dict[str, Any], issues: List[Dict[str, Any]], query: str
|
|
413
|
+
) -> None:
|
|
414
|
+
"""Traverse AST looking for performance issues."""
|
|
415
|
+
if not isinstance(node, dict):
|
|
416
|
+
return
|
|
417
|
+
|
|
418
|
+
node_type = node.get("type")
|
|
419
|
+
|
|
420
|
+
if node_type == "comparison":
|
|
421
|
+
operator = node.get("operator")
|
|
422
|
+
field = node.get("field")
|
|
423
|
+
value = node.get("value", "")
|
|
424
|
+
|
|
425
|
+
# Skip if no field or operator
|
|
426
|
+
if not field or not operator:
|
|
427
|
+
return
|
|
428
|
+
|
|
429
|
+
# Check for expensive operations
|
|
430
|
+
if operator in ["regexp", "not_regexp"]:
|
|
431
|
+
# Format query part
|
|
432
|
+
query_part = f"{field} {operator}"
|
|
433
|
+
if isinstance(value, str):
|
|
434
|
+
# Remove quotes from value for display
|
|
435
|
+
clean_value = value.strip("\"'")
|
|
436
|
+
query_part = f"{field} {operator} {clean_value}"
|
|
437
|
+
|
|
438
|
+
issues.append(
|
|
439
|
+
{
|
|
440
|
+
"type": "expensive_operator",
|
|
441
|
+
"severity": "warning",
|
|
442
|
+
"field": field,
|
|
443
|
+
"operator": operator,
|
|
444
|
+
"query_part": query_part,
|
|
445
|
+
"message": "Regular expression (Lucene syntax) operations can be slow on large datasets",
|
|
446
|
+
"suggestion": "Consider using 'contains' or 'startswith' if possible",
|
|
447
|
+
}
|
|
448
|
+
)
|
|
449
|
+
|
|
450
|
+
# Check for wildcard patterns
|
|
451
|
+
if operator in ["=", "eq", "!=", "ne"] and isinstance(value, str):
|
|
452
|
+
# Check for leading wildcard
|
|
453
|
+
if value.startswith("*"):
|
|
454
|
+
# Only flag for text fields
|
|
455
|
+
field_type = self._get_field_type(field)
|
|
456
|
+
if field_type in ["text", "keyword", None]: # None means unknown type
|
|
457
|
+
# Format query part similar to regexp
|
|
458
|
+
clean_value = value.strip("\"'")
|
|
459
|
+
query_part = f"{field} {operator} {clean_value}"
|
|
460
|
+
issues.append(
|
|
461
|
+
{
|
|
462
|
+
"type": "leading_wildcard",
|
|
463
|
+
"severity": "warning",
|
|
464
|
+
"field": field,
|
|
465
|
+
"operator": operator,
|
|
466
|
+
"query_part": query_part,
|
|
467
|
+
"message": "Leading wildcard in search field can be slow",
|
|
468
|
+
"suggestion": "Consider using a different search pattern if possible",
|
|
469
|
+
}
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
# Check for fuzzy search
|
|
473
|
+
if "~" in value:
|
|
474
|
+
# Extract fuzzy distance
|
|
475
|
+
parts = value.split("~")
|
|
476
|
+
if len(parts) == 2 and parts[1].isdigit():
|
|
477
|
+
distance = int(parts[1])
|
|
478
|
+
if distance > 2:
|
|
479
|
+
issues.append(
|
|
480
|
+
{
|
|
481
|
+
"type": "high_fuzzy_distance",
|
|
482
|
+
"severity": "warning",
|
|
483
|
+
"field": field,
|
|
484
|
+
"operator": operator,
|
|
485
|
+
"message": f"Fuzzy query with distance {distance} can be expensive",
|
|
486
|
+
"suggestion": "Consider using fuzzy distance <= 2 for better performance",
|
|
487
|
+
}
|
|
488
|
+
)
|
|
489
|
+
|
|
490
|
+
# Check for collection operations on high cardinality fields
|
|
491
|
+
if operator in ["any", "all"] and field in self.field_mappings:
|
|
492
|
+
field_info = self.field_mappings[field]
|
|
493
|
+
# Assume keyword fields could be high cardinality
|
|
494
|
+
if isinstance(field_info, str) and field_info == "keyword":
|
|
495
|
+
issues.append(
|
|
496
|
+
{
|
|
497
|
+
"type": "collection_operation_high_cardinality",
|
|
498
|
+
"severity": "warning",
|
|
499
|
+
"field": field,
|
|
500
|
+
"operator": operator,
|
|
501
|
+
"message": f"Collection operator '{operator}' on potentially high cardinality field",
|
|
502
|
+
"suggestion": "Ensure the field has reasonable cardinality for collection operations",
|
|
503
|
+
}
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
# Check for negated operations on text fields
|
|
507
|
+
if isinstance(operator, str) and operator.startswith("not_") and field in self.field_mappings:
|
|
508
|
+
field_info = self.field_mappings[field]
|
|
509
|
+
if isinstance(field_info, dict) and field_info.get("type") == "text":
|
|
510
|
+
issues.append(
|
|
511
|
+
{
|
|
512
|
+
"type": "negated_text_search",
|
|
513
|
+
"severity": "warning",
|
|
514
|
+
"field": field,
|
|
515
|
+
"operator": operator,
|
|
516
|
+
"message": "Negated operations on text fields can be inefficient",
|
|
517
|
+
"suggestion": "Consider restructuring the query to use positive matches",
|
|
518
|
+
}
|
|
519
|
+
)
|
|
520
|
+
|
|
521
|
+
elif node_type == "logical_op":
|
|
522
|
+
# Check for deeply nested OR operations
|
|
523
|
+
or_depth = self._count_or_depth(node)
|
|
524
|
+
if or_depth > 3:
|
|
525
|
+
issues.append(
|
|
526
|
+
{
|
|
527
|
+
"type": "deep_or_nesting",
|
|
528
|
+
"severity": "warning",
|
|
529
|
+
"depth": or_depth,
|
|
530
|
+
"message": f"Query has {or_depth} levels of OR operations which can impact performance",
|
|
531
|
+
"suggestion": "Consider simplifying the query or using terms queries",
|
|
532
|
+
}
|
|
533
|
+
)
|
|
534
|
+
|
|
535
|
+
# Recurse
|
|
536
|
+
left = node.get("left")
|
|
537
|
+
right = node.get("right")
|
|
538
|
+
if left:
|
|
539
|
+
self._traverse_ast_for_performance(left, issues, query)
|
|
540
|
+
if right:
|
|
541
|
+
self._traverse_ast_for_performance(right, issues, query)
|
|
542
|
+
|
|
543
|
+
elif node_type == "unary_op":
|
|
544
|
+
operand = node.get("operand")
|
|
545
|
+
if operand:
|
|
546
|
+
self._traverse_ast_for_performance(operand, issues, query)
|
|
547
|
+
|
|
548
|
+
def _count_or_depth(self, node: Dict[str, Any], current_depth: int = 0) -> int:
|
|
549
|
+
"""Count the maximum depth of OR operations."""
|
|
550
|
+
if not isinstance(node, dict):
|
|
551
|
+
return current_depth
|
|
552
|
+
|
|
553
|
+
if node.get("type") == "logical_op" and node.get("operator") == "or":
|
|
554
|
+
left = node.get("left")
|
|
555
|
+
right = node.get("right")
|
|
556
|
+
left_depth = self._count_or_depth(left, current_depth + 1) if left else current_depth
|
|
557
|
+
right_depth = self._count_or_depth(right, current_depth + 1) if right else current_depth
|
|
558
|
+
return max(left_depth, right_depth)
|
|
559
|
+
|
|
560
|
+
return current_depth
|
|
561
|
+
|
|
562
|
+
def _get_field_type(self, field: str) -> Optional[str]:
|
|
563
|
+
"""Get the type of a field from mappings.
|
|
564
|
+
|
|
565
|
+
Args:
|
|
566
|
+
field: Field name
|
|
567
|
+
|
|
568
|
+
Returns:
|
|
569
|
+
Field type or None if unknown
|
|
570
|
+
"""
|
|
571
|
+
if field not in self.field_mappings:
|
|
572
|
+
return None
|
|
573
|
+
|
|
574
|
+
field_info = self.field_mappings[field]
|
|
575
|
+
|
|
576
|
+
if isinstance(field_info, str):
|
|
577
|
+
# Simple type string
|
|
578
|
+
if field_info in [
|
|
579
|
+
"keyword",
|
|
580
|
+
"text",
|
|
581
|
+
"long",
|
|
582
|
+
"integer",
|
|
583
|
+
"short",
|
|
584
|
+
"byte",
|
|
585
|
+
"double",
|
|
586
|
+
"float",
|
|
587
|
+
"boolean",
|
|
588
|
+
"date",
|
|
589
|
+
"ip",
|
|
590
|
+
]:
|
|
591
|
+
return field_info
|
|
592
|
+
else:
|
|
593
|
+
# It's a field name mapping, we don't know the type
|
|
594
|
+
return None
|
|
595
|
+
elif isinstance(field_info, dict):
|
|
596
|
+
# Complex mapping - try to get type
|
|
597
|
+
return field_info.get("type")
|
|
598
|
+
|
|
599
|
+
return None
|