tellaro-query-language 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. tellaro_query_language-0.1.0.dist-info/LICENSE +21 -0
  2. tellaro_query_language-0.1.0.dist-info/METADATA +401 -0
  3. tellaro_query_language-0.1.0.dist-info/RECORD +56 -0
  4. tellaro_query_language-0.1.0.dist-info/WHEEL +4 -0
  5. tellaro_query_language-0.1.0.dist-info/entry_points.txt +7 -0
  6. tql/__init__.py +47 -0
  7. tql/analyzer.py +385 -0
  8. tql/cache/__init__.py +7 -0
  9. tql/cache/base.py +25 -0
  10. tql/cache/memory.py +63 -0
  11. tql/cache/redis.py +68 -0
  12. tql/core.py +929 -0
  13. tql/core_components/README.md +92 -0
  14. tql/core_components/__init__.py +20 -0
  15. tql/core_components/file_operations.py +113 -0
  16. tql/core_components/opensearch_operations.py +869 -0
  17. tql/core_components/stats_operations.py +200 -0
  18. tql/core_components/validation_operations.py +599 -0
  19. tql/evaluator.py +379 -0
  20. tql/evaluator_components/README.md +131 -0
  21. tql/evaluator_components/__init__.py +17 -0
  22. tql/evaluator_components/field_access.py +176 -0
  23. tql/evaluator_components/special_expressions.py +296 -0
  24. tql/evaluator_components/value_comparison.py +315 -0
  25. tql/exceptions.py +160 -0
  26. tql/geoip_normalizer.py +233 -0
  27. tql/mutator_analyzer.py +830 -0
  28. tql/mutators/__init__.py +222 -0
  29. tql/mutators/base.py +78 -0
  30. tql/mutators/dns.py +316 -0
  31. tql/mutators/encoding.py +218 -0
  32. tql/mutators/geo.py +363 -0
  33. tql/mutators/list.py +212 -0
  34. tql/mutators/network.py +163 -0
  35. tql/mutators/security.py +225 -0
  36. tql/mutators/string.py +165 -0
  37. tql/opensearch.py +78 -0
  38. tql/opensearch_components/README.md +130 -0
  39. tql/opensearch_components/__init__.py +17 -0
  40. tql/opensearch_components/field_mapping.py +399 -0
  41. tql/opensearch_components/lucene_converter.py +305 -0
  42. tql/opensearch_components/query_converter.py +775 -0
  43. tql/opensearch_mappings.py +309 -0
  44. tql/opensearch_stats.py +451 -0
  45. tql/parser.py +1363 -0
  46. tql/parser_components/README.md +72 -0
  47. tql/parser_components/__init__.py +20 -0
  48. tql/parser_components/ast_builder.py +162 -0
  49. tql/parser_components/error_analyzer.py +101 -0
  50. tql/parser_components/field_extractor.py +112 -0
  51. tql/parser_components/grammar.py +473 -0
  52. tql/post_processor.py +737 -0
  53. tql/scripts.py +124 -0
  54. tql/stats_evaluator.py +444 -0
  55. tql/stats_transformer.py +184 -0
  56. tql/validators.py +110 -0
@@ -0,0 +1,599 @@
1
+ """Validation operations for TQL.
2
+
3
+ This module handles query validation, type checking, and performance analysis.
4
+ """
5
+
6
+ from typing import Any, Dict, List, Optional
7
+
8
+ from ..exceptions import TQLFieldError, TQLTypeError, TQLValidationError
9
+ from ..parser import TQLParser
10
+
11
+
12
+ class ValidationOperations:
13
+ """Handles validation operations for TQL."""
14
+
15
+ def __init__(self, parser: TQLParser, field_mappings: Dict[str, Any]):
16
+ """Initialize validation operations.
17
+
18
+ Args:
19
+ parser: TQL parser instance
20
+ field_mappings: Field mapping configuration
21
+ """
22
+ self.parser = parser
23
+ self.field_mappings = field_mappings
24
+
25
+ def validate(self, query: str, validate_fields: bool = False) -> bool:
26
+ """Validate a TQL query for syntax and optionally field names.
27
+
28
+ Args:
29
+ query: TQL query string
30
+ validate_fields: Whether to validate field names against mappings
31
+
32
+ Returns:
33
+ True if query is valid
34
+
35
+ Raises:
36
+ Various TQL exceptions if validation fails
37
+ """
38
+ # Parse the query
39
+ ast = self.parser.parse(query)
40
+
41
+ # Validate field names if requested
42
+ if validate_fields:
43
+ self._validate_fields_in_ast(ast)
44
+
45
+ # Always check type compatibility
46
+ self._check_type_compatibility(ast)
47
+
48
+ return True
49
+
50
+ def _validate_fields_in_ast(self, ast: Dict[str, Any]) -> None:
51
+ """Recursively validate field names in AST against field mappings."""
52
+ if isinstance(ast, dict):
53
+ if ast.get("type") == "comparison" and "field" in ast:
54
+ field = ast["field"]
55
+ if field not in self.field_mappings:
56
+ raise TQLFieldError(field=field, available_fields=sorted(self.field_mappings.keys()))
57
+ elif ast.get("type") == "logical_op":
58
+ left = ast.get("left")
59
+ right = ast.get("right")
60
+ if left:
61
+ self._validate_fields_in_ast(left)
62
+ if right:
63
+ self._validate_fields_in_ast(right)
64
+ elif ast.get("type") == "unary_op":
65
+ operand = ast.get("operand")
66
+ if operand:
67
+ self._validate_fields_in_ast(operand)
68
+
69
+ def _check_type_compatibility(self, ast: Dict[str, Any]) -> None: # noqa: C901
70
+ """Check type compatibility between fields, operators, and values.
71
+
72
+ Args:
73
+ ast: The AST to validate
74
+
75
+ Raises:
76
+ TQLTypeError: If type incompatibilities are found
77
+ TQLOperatorError: If operators are used incorrectly
78
+ """
79
+ if not isinstance(ast, dict):
80
+ return
81
+
82
+ node_type = ast.get("type")
83
+
84
+ if node_type == "comparison":
85
+ field = ast.get("field")
86
+ operator = ast.get("operator")
87
+ value = ast.get("value")
88
+ type_hint = ast.get("type_hint")
89
+
90
+ # Only proceed if we have required fields
91
+ if not field or not operator:
92
+ return
93
+
94
+ # Handle multi-field scenarios first (for 'in' operator with field list)
95
+ if operator == "in" and isinstance(ast.get("original_fields"), list):
96
+ # This is a "value in [field1, field2, ...]" that was expanded
97
+ # Check each field's type
98
+ for check_field in ast.get("original_fields", [field]):
99
+ field_info = self.field_mappings.get(check_field, {})
100
+ if field_info:
101
+ self._validate_comparison_for_field(check_field, field_info, operator, value, type_hint)
102
+ return
103
+
104
+ # Get field type from mappings
105
+ field_info = self.field_mappings.get(field, {})
106
+ if field_info:
107
+ self._validate_comparison_for_field(field, field_info, operator, value, type_hint)
108
+
109
+ elif node_type == "logical_op":
110
+ left = ast.get("left")
111
+ right = ast.get("right")
112
+ if left:
113
+ self._check_type_compatibility(left)
114
+ if right:
115
+ self._check_type_compatibility(right)
116
+
117
+ elif node_type == "unary_op":
118
+ operand = ast.get("operand")
119
+ if operand:
120
+ self._check_type_compatibility(operand)
121
+
122
+ elif node_type == "query_with_stats":
123
+ if "filter" in ast:
124
+ self._check_type_compatibility(ast["filter"])
125
+
126
+ def _validate_comparison_for_field( # noqa: C901
127
+ self, field: str, field_info: Any, operator: str, value: Any, type_hint: Optional[str]
128
+ ) -> None:
129
+ """Validate a comparison for a specific field.
130
+
131
+ Args:
132
+ field: Field name
133
+ field_info: Field mapping information
134
+ operator: Comparison operator
135
+ value: Comparison value
136
+ type_hint: Optional type hint from query
137
+ """
138
+ # Determine the field type
139
+ field_type: Optional[str] = None
140
+ if isinstance(field_info, str):
141
+ # Simple mapping: could be a field name or a type
142
+ if field_info in [
143
+ "keyword",
144
+ "text",
145
+ "long",
146
+ "integer",
147
+ "short",
148
+ "byte",
149
+ "double",
150
+ "float",
151
+ "boolean",
152
+ "date",
153
+ "ip",
154
+ ]:
155
+ field_type = field_info
156
+ else:
157
+ # It's a mapped field name, we don't know the type
158
+ return
159
+ elif isinstance(field_info, dict):
160
+ # Complex mapping
161
+ field_type = field_info.get("type")
162
+ if not field_type:
163
+ # Check if it's an OpenSearch-style mapping with nested field definition
164
+ # e.g., {"message": {"type": "text", "analyzer": "standard"}}
165
+ if field in field_info and isinstance(field_info[field], dict):
166
+ field_type = field_info[field].get("type")
167
+ # Check if it's a multi-field mapping
168
+ elif "fields" in field_info:
169
+ # Multi-field, check the main field type
170
+ field_type = field_info.get("type", "text")
171
+ else:
172
+ # No type info available
173
+ return
174
+ else:
175
+ # No type info available
176
+ return
177
+
178
+ # Apply type hint override if provided
179
+ if type_hint and field_type:
180
+ self._validate_type_hint_compatibility(field, field_type, type_hint)
181
+ # Use the type hint for validation
182
+ validation_type = self._map_type_hint_to_es_type(type_hint)
183
+ else:
184
+ validation_type = field_type
185
+
186
+ # Validate based on field type
187
+ if validation_type:
188
+ self._validate_simple_type_compatibility(field, validation_type, operator)
189
+
190
+ # Additional validation for type mismatches
191
+ # Check if we're using comparison operators with incompatible value types
192
+ if validation_type in ["text", "keyword"] and operator in [">", "<", ">=", "<=", "gt", "lt", "gte", "lte"]:
193
+ # String comparison operators require string values
194
+ # Check if value looks like a number (parser returns all values as strings)
195
+ if isinstance(value, str) and value.replace(".", "", 1).replace("-", "", 1).isdigit():
196
+ raise TQLTypeError(
197
+ field=field,
198
+ field_type=validation_type,
199
+ operator=operator,
200
+ suggestions=["Use a string value for comparison, or use a numeric field"],
201
+ )
202
+
203
+ def _validate_type_hint_compatibility(self, field: str, field_type: str, type_hint: str) -> None:
204
+ """Validate that a type hint is compatible with the field's actual type."""
205
+ # Map type hints to ES types
206
+ hint_es_type = self._map_type_hint_to_es_type(type_hint)
207
+
208
+ # Check compatibility
209
+ compatible = False
210
+
211
+ # Numeric types can be cast between each other
212
+ numeric_types = {"long", "integer", "short", "byte", "double", "float"}
213
+ if field_type in numeric_types and hint_es_type in numeric_types:
214
+ compatible = True
215
+ # String types
216
+ elif field_type in {"text", "keyword"} and hint_es_type in {"text", "keyword"}:
217
+ compatible = True
218
+ # Boolean
219
+ elif field_type == "boolean" and hint_es_type == "boolean":
220
+ compatible = True
221
+ # Date
222
+ elif field_type == "date" and hint_es_type == "date":
223
+ compatible = True
224
+ # IP
225
+ elif field_type == "ip" and hint_es_type == "ip":
226
+ compatible = True
227
+ # Geo point
228
+ elif field_type == "geo_point" and hint_es_type == "geo_point":
229
+ compatible = True
230
+ # Object/nested
231
+ elif field_type in {"object", "nested"} and hint_es_type == "object":
232
+ compatible = True
233
+
234
+ if not compatible:
235
+ # Use TQLValidationError instead since TQLTypeError requires operator
236
+ raise TQLValidationError(
237
+ f"Type hint '{type_hint}' is incompatible with field '{field}' of type '{field_type}'"
238
+ )
239
+
240
+ def _map_type_hint_to_es_type(self, type_hint: str) -> str:
241
+ """Map TQL type hints to Elasticsearch/OpenSearch types."""
242
+ mapping = {
243
+ "number": "double",
244
+ "int": "long",
245
+ "float": "double",
246
+ "decimal": "double",
247
+ "string": "keyword",
248
+ "text": "text",
249
+ "bool": "boolean",
250
+ "boolean": "boolean",
251
+ "date": "date",
252
+ "array": "keyword", # Arrays don't have a specific type
253
+ "geo": "geo_point",
254
+ "object": "object",
255
+ "ip": "ip",
256
+ }
257
+ return mapping.get(type_hint.lower(), "keyword")
258
+
259
+ def _validate_simple_type_compatibility(self, field: str, field_type: str, operator: str) -> None:
260
+ """Validate operator compatibility with field type.
261
+
262
+ Args:
263
+ field: Field name for error messages
264
+ field_type: The field's data type
265
+ operator: The operator being used
266
+
267
+ Raises:
268
+ TQLOperatorError: If operator is incompatible with field type
269
+ """
270
+ # Define operator compatibility by type
271
+ numeric_ops = {
272
+ "eq",
273
+ "=",
274
+ "ne",
275
+ "!=",
276
+ "gt",
277
+ ">",
278
+ "gte",
279
+ ">=",
280
+ "lt",
281
+ "<",
282
+ "lte",
283
+ "<=",
284
+ "between",
285
+ "not_between",
286
+ "in",
287
+ "not_in",
288
+ "exists",
289
+ "not_exists",
290
+ }
291
+
292
+ # Text fields (analyzed) should not use range operators
293
+ text_ops = {
294
+ "eq",
295
+ "=",
296
+ "ne",
297
+ "!=",
298
+ "contains",
299
+ "not_contains",
300
+ "startswith",
301
+ "not_startswith",
302
+ "endswith",
303
+ "not_endswith",
304
+ "regexp",
305
+ "not_regexp",
306
+ "in",
307
+ "not_in",
308
+ "exists",
309
+ "not_exists",
310
+ }
311
+
312
+ # Keyword fields can use range operators for lexicographic comparison
313
+ keyword_ops = {
314
+ "eq",
315
+ "=",
316
+ "ne",
317
+ "!=",
318
+ "contains",
319
+ "not_contains",
320
+ "startswith",
321
+ "not_startswith",
322
+ "endswith",
323
+ "not_endswith",
324
+ "regexp",
325
+ "not_regexp",
326
+ "in",
327
+ "not_in",
328
+ "exists",
329
+ "not_exists",
330
+ "gt",
331
+ ">",
332
+ "gte",
333
+ ">=",
334
+ "lt",
335
+ "<",
336
+ "lte",
337
+ "<=",
338
+ }
339
+
340
+ boolean_ops = {"eq", "=", "ne", "!=", "exists", "not_exists", "is", "is_not"}
341
+
342
+ date_ops = {
343
+ "eq",
344
+ "=",
345
+ "ne",
346
+ "!=",
347
+ "gt",
348
+ ">",
349
+ "gte",
350
+ ">=",
351
+ "lt",
352
+ "<",
353
+ "lte",
354
+ "<=",
355
+ "between",
356
+ "not_between",
357
+ "exists",
358
+ "not_exists",
359
+ }
360
+
361
+ ip_ops = {"eq", "=", "ne", "!=", "cidr", "not_cidr", "in", "not_in", "exists", "not_exists"}
362
+
363
+ # Determine allowed operators based on type
364
+ if field_type in ["long", "integer", "short", "byte", "double", "float"]:
365
+ allowed_ops = numeric_ops
366
+ # type_desc = "numeric" # Not used
367
+ elif field_type == "text":
368
+ allowed_ops = text_ops
369
+ # type_desc = "text" # Not used
370
+ elif field_type == "keyword":
371
+ allowed_ops = keyword_ops
372
+ # type_desc = "keyword" # Not used
373
+ elif field_type == "boolean":
374
+ allowed_ops = boolean_ops
375
+ # type_desc = "boolean" # Not used
376
+ elif field_type == "date":
377
+ allowed_ops = date_ops
378
+ # type_desc = "date" # Not used
379
+ elif field_type == "ip":
380
+ allowed_ops = ip_ops
381
+ # type_desc = "IP" # Not used
382
+ else:
383
+ # Unknown type, allow all operators
384
+ return
385
+
386
+ # Check if operator is allowed
387
+ if operator not in allowed_ops:
388
+ # Create validation error with proper message
389
+ if field_type == "text" and operator in ["gt", ">", "gte", ">=", "lt", "<", "lte", "<="]:
390
+ pass # Error will be raised with proper message below
391
+ else:
392
+ pass # Error will be raised with proper message below
393
+
394
+ # Use TQLTypeError since it's about operator-field type compatibility
395
+ raise TQLTypeError(field=field, field_type=field_type, operator=operator, valid_operators=list(allowed_ops))
396
+
397
+ def check_performance_issues(self, ast: Dict[str, Any], query: str) -> List[Dict[str, Any]]:
398
+ """Check for potential performance issues in the query.
399
+
400
+ Args:
401
+ ast: Parsed AST
402
+ query: Original query string
403
+
404
+ Returns:
405
+ List of performance issues found
406
+ """
407
+ issues: List[Dict[str, Any]] = []
408
+ self._traverse_ast_for_performance(ast, issues, query)
409
+ return issues
410
+
411
+ def _traverse_ast_for_performance( # noqa: C901
412
+ self, node: Dict[str, Any], issues: List[Dict[str, Any]], query: str
413
+ ) -> None:
414
+ """Traverse AST looking for performance issues."""
415
+ if not isinstance(node, dict):
416
+ return
417
+
418
+ node_type = node.get("type")
419
+
420
+ if node_type == "comparison":
421
+ operator = node.get("operator")
422
+ field = node.get("field")
423
+ value = node.get("value", "")
424
+
425
+ # Skip if no field or operator
426
+ if not field or not operator:
427
+ return
428
+
429
+ # Check for expensive operations
430
+ if operator in ["regexp", "not_regexp"]:
431
+ # Format query part
432
+ query_part = f"{field} {operator}"
433
+ if isinstance(value, str):
434
+ # Remove quotes from value for display
435
+ clean_value = value.strip("\"'")
436
+ query_part = f"{field} {operator} {clean_value}"
437
+
438
+ issues.append(
439
+ {
440
+ "type": "expensive_operator",
441
+ "severity": "warning",
442
+ "field": field,
443
+ "operator": operator,
444
+ "query_part": query_part,
445
+ "message": "Regular expression (Lucene syntax) operations can be slow on large datasets",
446
+ "suggestion": "Consider using 'contains' or 'startswith' if possible",
447
+ }
448
+ )
449
+
450
+ # Check for wildcard patterns
451
+ if operator in ["=", "eq", "!=", "ne"] and isinstance(value, str):
452
+ # Check for leading wildcard
453
+ if value.startswith("*"):
454
+ # Only flag for text fields
455
+ field_type = self._get_field_type(field)
456
+ if field_type in ["text", "keyword", None]: # None means unknown type
457
+ # Format query part similar to regexp
458
+ clean_value = value.strip("\"'")
459
+ query_part = f"{field} {operator} {clean_value}"
460
+ issues.append(
461
+ {
462
+ "type": "leading_wildcard",
463
+ "severity": "warning",
464
+ "field": field,
465
+ "operator": operator,
466
+ "query_part": query_part,
467
+ "message": "Leading wildcard in search field can be slow",
468
+ "suggestion": "Consider using a different search pattern if possible",
469
+ }
470
+ )
471
+
472
+ # Check for fuzzy search
473
+ if "~" in value:
474
+ # Extract fuzzy distance
475
+ parts = value.split("~")
476
+ if len(parts) == 2 and parts[1].isdigit():
477
+ distance = int(parts[1])
478
+ if distance > 2:
479
+ issues.append(
480
+ {
481
+ "type": "high_fuzzy_distance",
482
+ "severity": "warning",
483
+ "field": field,
484
+ "operator": operator,
485
+ "message": f"Fuzzy query with distance {distance} can be expensive",
486
+ "suggestion": "Consider using fuzzy distance <= 2 for better performance",
487
+ }
488
+ )
489
+
490
+ # Check for collection operations on high cardinality fields
491
+ if operator in ["any", "all"] and field in self.field_mappings:
492
+ field_info = self.field_mappings[field]
493
+ # Assume keyword fields could be high cardinality
494
+ if isinstance(field_info, str) and field_info == "keyword":
495
+ issues.append(
496
+ {
497
+ "type": "collection_operation_high_cardinality",
498
+ "severity": "warning",
499
+ "field": field,
500
+ "operator": operator,
501
+ "message": f"Collection operator '{operator}' on potentially high cardinality field",
502
+ "suggestion": "Ensure the field has reasonable cardinality for collection operations",
503
+ }
504
+ )
505
+
506
+ # Check for negated operations on text fields
507
+ if isinstance(operator, str) and operator.startswith("not_") and field in self.field_mappings:
508
+ field_info = self.field_mappings[field]
509
+ if isinstance(field_info, dict) and field_info.get("type") == "text":
510
+ issues.append(
511
+ {
512
+ "type": "negated_text_search",
513
+ "severity": "warning",
514
+ "field": field,
515
+ "operator": operator,
516
+ "message": "Negated operations on text fields can be inefficient",
517
+ "suggestion": "Consider restructuring the query to use positive matches",
518
+ }
519
+ )
520
+
521
+ elif node_type == "logical_op":
522
+ # Check for deeply nested OR operations
523
+ or_depth = self._count_or_depth(node)
524
+ if or_depth > 3:
525
+ issues.append(
526
+ {
527
+ "type": "deep_or_nesting",
528
+ "severity": "warning",
529
+ "depth": or_depth,
530
+ "message": f"Query has {or_depth} levels of OR operations which can impact performance",
531
+ "suggestion": "Consider simplifying the query or using terms queries",
532
+ }
533
+ )
534
+
535
+ # Recurse
536
+ left = node.get("left")
537
+ right = node.get("right")
538
+ if left:
539
+ self._traverse_ast_for_performance(left, issues, query)
540
+ if right:
541
+ self._traverse_ast_for_performance(right, issues, query)
542
+
543
+ elif node_type == "unary_op":
544
+ operand = node.get("operand")
545
+ if operand:
546
+ self._traverse_ast_for_performance(operand, issues, query)
547
+
548
+ def _count_or_depth(self, node: Dict[str, Any], current_depth: int = 0) -> int:
549
+ """Count the maximum depth of OR operations."""
550
+ if not isinstance(node, dict):
551
+ return current_depth
552
+
553
+ if node.get("type") == "logical_op" and node.get("operator") == "or":
554
+ left = node.get("left")
555
+ right = node.get("right")
556
+ left_depth = self._count_or_depth(left, current_depth + 1) if left else current_depth
557
+ right_depth = self._count_or_depth(right, current_depth + 1) if right else current_depth
558
+ return max(left_depth, right_depth)
559
+
560
+ return current_depth
561
+
562
+ def _get_field_type(self, field: str) -> Optional[str]:
563
+ """Get the type of a field from mappings.
564
+
565
+ Args:
566
+ field: Field name
567
+
568
+ Returns:
569
+ Field type or None if unknown
570
+ """
571
+ if field not in self.field_mappings:
572
+ return None
573
+
574
+ field_info = self.field_mappings[field]
575
+
576
+ if isinstance(field_info, str):
577
+ # Simple type string
578
+ if field_info in [
579
+ "keyword",
580
+ "text",
581
+ "long",
582
+ "integer",
583
+ "short",
584
+ "byte",
585
+ "double",
586
+ "float",
587
+ "boolean",
588
+ "date",
589
+ "ip",
590
+ ]:
591
+ return field_info
592
+ else:
593
+ # It's a field name mapping, we don't know the type
594
+ return None
595
+ elif isinstance(field_info, dict):
596
+ # Complex mapping - try to get type
597
+ return field_info.get("type")
598
+
599
+ return None