tellaro-query-language 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. tellaro_query_language-0.1.0.dist-info/LICENSE +21 -0
  2. tellaro_query_language-0.1.0.dist-info/METADATA +401 -0
  3. tellaro_query_language-0.1.0.dist-info/RECORD +56 -0
  4. tellaro_query_language-0.1.0.dist-info/WHEEL +4 -0
  5. tellaro_query_language-0.1.0.dist-info/entry_points.txt +7 -0
  6. tql/__init__.py +47 -0
  7. tql/analyzer.py +385 -0
  8. tql/cache/__init__.py +7 -0
  9. tql/cache/base.py +25 -0
  10. tql/cache/memory.py +63 -0
  11. tql/cache/redis.py +68 -0
  12. tql/core.py +929 -0
  13. tql/core_components/README.md +92 -0
  14. tql/core_components/__init__.py +20 -0
  15. tql/core_components/file_operations.py +113 -0
  16. tql/core_components/opensearch_operations.py +869 -0
  17. tql/core_components/stats_operations.py +200 -0
  18. tql/core_components/validation_operations.py +599 -0
  19. tql/evaluator.py +379 -0
  20. tql/evaluator_components/README.md +131 -0
  21. tql/evaluator_components/__init__.py +17 -0
  22. tql/evaluator_components/field_access.py +176 -0
  23. tql/evaluator_components/special_expressions.py +296 -0
  24. tql/evaluator_components/value_comparison.py +315 -0
  25. tql/exceptions.py +160 -0
  26. tql/geoip_normalizer.py +233 -0
  27. tql/mutator_analyzer.py +830 -0
  28. tql/mutators/__init__.py +222 -0
  29. tql/mutators/base.py +78 -0
  30. tql/mutators/dns.py +316 -0
  31. tql/mutators/encoding.py +218 -0
  32. tql/mutators/geo.py +363 -0
  33. tql/mutators/list.py +212 -0
  34. tql/mutators/network.py +163 -0
  35. tql/mutators/security.py +225 -0
  36. tql/mutators/string.py +165 -0
  37. tql/opensearch.py +78 -0
  38. tql/opensearch_components/README.md +130 -0
  39. tql/opensearch_components/__init__.py +17 -0
  40. tql/opensearch_components/field_mapping.py +399 -0
  41. tql/opensearch_components/lucene_converter.py +305 -0
  42. tql/opensearch_components/query_converter.py +775 -0
  43. tql/opensearch_mappings.py +309 -0
  44. tql/opensearch_stats.py +451 -0
  45. tql/parser.py +1363 -0
  46. tql/parser_components/README.md +72 -0
  47. tql/parser_components/__init__.py +20 -0
  48. tql/parser_components/ast_builder.py +162 -0
  49. tql/parser_components/error_analyzer.py +101 -0
  50. tql/parser_components/field_extractor.py +112 -0
  51. tql/parser_components/grammar.py +473 -0
  52. tql/post_processor.py +737 -0
  53. tql/scripts.py +124 -0
  54. tql/stats_evaluator.py +444 -0
  55. tql/stats_transformer.py +184 -0
  56. tql/validators.py +110 -0
@@ -0,0 +1,176 @@
1
+ """Field access utilities for TQL evaluator.
2
+
3
+ This module provides utilities for accessing nested fields in records,
4
+ handling field mappings, and type conversions.
5
+ """
6
+
7
+ from typing import Any, Dict
8
+
9
+
10
+ class FieldAccessor:
11
+ """Handles field access operations for TQL evaluation."""
12
+
13
+ # Sentinel value to distinguish missing fields from None values
14
+ _MISSING_FIELD = object()
15
+
16
+ def get_field_value(self, record: Dict[str, Any], field_path: str) -> Any: # noqa: C901
17
+ """Get a field value from a record, supporting nested field access.
18
+
19
+ Args:
20
+ record: The record dictionary
21
+ field_path: Dot-separated field path (e.g., "user.name")
22
+
23
+ Returns:
24
+ The field value or _MISSING_FIELD if not found
25
+ """
26
+ # Split the field path into parts
27
+ parts = field_path.split(".")
28
+ current = record
29
+
30
+ for part in parts:
31
+ if isinstance(current, dict):
32
+ if part in current:
33
+ current = current[part]
34
+ elif part.isdigit() and isinstance(current, (list, tuple)):
35
+ # Support array indexing like "items.0"
36
+ try:
37
+ index = int(part)
38
+ if 0 <= index < len(current):
39
+ current = current[index]
40
+ else:
41
+ return self._MISSING_FIELD
42
+ except (ValueError, IndexError):
43
+ return self._MISSING_FIELD
44
+ else:
45
+ return self._MISSING_FIELD
46
+ elif isinstance(current, (list, tuple)) and part.isdigit():
47
+ # Support direct array indexing
48
+ try:
49
+ index = int(part)
50
+ if 0 <= index < len(current):
51
+ current = current[index]
52
+ else:
53
+ return self._MISSING_FIELD
54
+ except (ValueError, IndexError):
55
+ return self._MISSING_FIELD
56
+ else:
57
+ return self._MISSING_FIELD
58
+
59
+ return current
60
+
61
+ def apply_field_mapping(self, field_name: str, field_mappings: Dict[str, Any]) -> str:
62
+ """Apply field mapping to get the actual field name.
63
+
64
+ Args:
65
+ field_name: Original field name from query
66
+ field_mappings: Field mapping configuration
67
+
68
+ Returns:
69
+ Mapped field name
70
+ """
71
+ if field_name not in field_mappings:
72
+ return field_name
73
+
74
+ mapping = field_mappings[field_name]
75
+
76
+ if isinstance(mapping, str):
77
+ # Simple string mapping
78
+ if mapping not in [
79
+ "keyword",
80
+ "text",
81
+ "long",
82
+ "integer",
83
+ "short",
84
+ "byte",
85
+ "double",
86
+ "float",
87
+ "boolean",
88
+ "date",
89
+ "ip",
90
+ ]:
91
+ # This is a field name mapping, not a type
92
+ return mapping
93
+ else:
94
+ # This is a type specification, use original field
95
+ return field_name
96
+ elif isinstance(mapping, dict) and mapping:
97
+ # Intelligent mapping - extract the base field
98
+ if "type" in mapping and len(mapping) == 1:
99
+ # Just a type specification, use original field
100
+ return field_name
101
+ else:
102
+ # Find the first field that's not a meta field
103
+ for key in mapping:
104
+ if key not in ["analyzer", "type", "fields"]:
105
+ return key
106
+
107
+ return field_name
108
+
109
+ def apply_type_hint( # noqa: C901
110
+ self, value: Any, type_hint: str, field_name: str, operator: str, field_mappings: Dict[str, str]
111
+ ) -> Any:
112
+ """Apply type hint to convert value to the appropriate type.
113
+
114
+ Args:
115
+ value: Value to convert
116
+ type_hint: Type hint (e.g., 'ip', 'integer', 'boolean')
117
+ field_name: Field name for error messages
118
+ operator: Operator being used
119
+ field_mappings: Field mappings
120
+
121
+ Returns:
122
+ Converted value
123
+
124
+ Raises:
125
+ TQLError: If conversion fails
126
+ """
127
+ from ..exceptions import TQLError
128
+
129
+ if value is None or value is self._MISSING_FIELD:
130
+ return value
131
+
132
+ if type_hint == "ip":
133
+ # For IP type hint, validate the IP address format
134
+ import ipaddress
135
+
136
+ try:
137
+ # Try to parse as IP address to validate format
138
+ ipaddress.ip_address(str(value))
139
+ # Return as string for comparison
140
+ return str(value)
141
+ except ValueError:
142
+ # For CIDR operator, allow CIDR notation
143
+ if operator in ["cidr", "not_cidr"]:
144
+ try:
145
+ ipaddress.ip_network(str(value), strict=False)
146
+ return str(value)
147
+ except ValueError:
148
+ pass
149
+ raise TQLError(f"Invalid IP address format for field '{field_name}': {value}")
150
+ elif type_hint == "integer":
151
+ try:
152
+ return int(value)
153
+ except (ValueError, TypeError):
154
+ raise TQLError(f"Cannot convert value to integer for field '{field_name}': {value}")
155
+ elif type_hint == "float":
156
+ try:
157
+ return float(value)
158
+ except (ValueError, TypeError):
159
+ raise TQLError(f"Cannot convert value to float for field '{field_name}': {value}")
160
+ elif type_hint == "boolean" or type_hint == "bool":
161
+ if isinstance(value, bool):
162
+ return value
163
+ elif isinstance(value, str):
164
+ if value.lower() == "true" or value == "1":
165
+ return True
166
+ elif value.lower() == "false" or value == "0":
167
+ return False
168
+ else:
169
+ raise TQLError(f"Cannot convert value to boolean for field '{field_name}': {value}")
170
+ else:
171
+ raise TQLError(f"Cannot convert value to boolean for field '{field_name}': {value}")
172
+ elif type_hint == "string":
173
+ return str(value)
174
+ else:
175
+ # Unknown type hint, return value as-is
176
+ return value
@@ -0,0 +1,296 @@
1
+ """Special expression evaluators for TQL.
2
+
3
+ This module handles evaluation of special expressions like geo() and nslookup()
4
+ that require external lookups or enrichment.
5
+ """
6
+
7
+ from typing import Any, Dict, Optional
8
+
9
+ from ..mutators import apply_mutators
10
+
11
+
12
+ class SpecialExpressionEvaluator:
13
+ """Evaluates special TQL expressions like geo() and nslookup()."""
14
+
15
+ # Sentinel value to distinguish missing fields from None values
16
+ _MISSING_FIELD = object()
17
+
18
+ def __init__(self, get_field_value_func, evaluate_node_func):
19
+ """Initialize the special expression evaluator.
20
+
21
+ Args:
22
+ get_field_value_func: Function to get field values from records
23
+ evaluate_node_func: Function to evaluate AST nodes
24
+ """
25
+ self._get_field_value = get_field_value_func
26
+ self._evaluate_node = evaluate_node_func
27
+
28
+ def evaluate_geo_expr( # noqa: C901
29
+ self, node: Dict[str, Any], record: Dict[str, Any], field_mappings: Dict[str, str]
30
+ ) -> bool:
31
+ """Evaluate a geo() expression.
32
+
33
+ Args:
34
+ node: Geo expression AST node
35
+ record: Record to evaluate against
36
+ field_mappings: Field name mappings
37
+
38
+ Returns:
39
+ Boolean result of the geo expression
40
+ """
41
+ field_name = node["field"]
42
+ field_mutators = node.get("field_mutators", [])
43
+ conditions = node["conditions"]
44
+ geo_params = node.get("geo_params", {})
45
+
46
+ # Apply field mapping if available
47
+ actual_field = field_name
48
+ if field_name in field_mappings:
49
+ mapping = field_mappings[field_name]
50
+ if isinstance(mapping, str):
51
+ # Simple string mapping
52
+ if mapping not in [
53
+ "keyword",
54
+ "text",
55
+ "long",
56
+ "integer",
57
+ "short",
58
+ "byte",
59
+ "double",
60
+ "float",
61
+ "boolean",
62
+ "date",
63
+ "ip",
64
+ ]:
65
+ # This is a field name mapping, not a type
66
+ actual_field = mapping
67
+ elif isinstance(mapping, dict) and mapping:
68
+ # Intelligent mapping - extract the base field
69
+ if "type" in mapping and len(mapping) == 1:
70
+ # Just a type specification, use original field
71
+ actual_field = field_name
72
+ else:
73
+ # Find the first field that's not a meta field
74
+ for key in mapping:
75
+ if key != "analyzer" and key != "type":
76
+ actual_field = key
77
+ break
78
+
79
+ # Get the field value (IP address)
80
+ field_value = self._get_field_value(record, actual_field)
81
+
82
+ # If field is missing or None, return False
83
+ if field_value is self._MISSING_FIELD or field_value is None:
84
+ return False
85
+
86
+ # Check if the record already has geo data (from post-processing)
87
+ # Geo data would be nested under the parent of the IP field
88
+ geo_data = None
89
+
90
+ # Check if a custom field location was specified
91
+ custom_field = geo_params.get("field")
92
+
93
+ if custom_field:
94
+ # Check the custom field location
95
+ custom_data = self._get_field_value(record, custom_field)
96
+ if custom_data is not self._MISSING_FIELD and isinstance(custom_data, dict):
97
+ # Check if this looks like geo data
98
+ if any(key in custom_data for key in ["country_iso_code", "city_name", "location"]):
99
+ geo_data = {"geo": custom_data}
100
+ # Also check for AS data as sibling
101
+ if "." in custom_field:
102
+ as_parent_path = custom_field.rsplit(".", 1)[0]
103
+ as_parent = self._get_field_value(record, as_parent_path)
104
+ if isinstance(as_parent, dict) and "as" in as_parent:
105
+ geo_data["as"] = as_parent["as"]
106
+ elif "as" in record:
107
+ geo_data["as"] = record["as"]
108
+ else:
109
+ # Default locations
110
+ if "." in actual_field:
111
+ # For nested fields like destination.ip, check destination.geo
112
+ parent_path = actual_field.rsplit(".", 1)[0]
113
+ parent = self._get_field_value(record, parent_path)
114
+ if isinstance(parent, dict) and "geo" in parent:
115
+ # Found geo data under parent
116
+ geo_data = parent
117
+ else:
118
+ # For top-level fields, check enrichment.geo
119
+ if "enrichment" in record and isinstance(record["enrichment"], dict):
120
+ if "geo" in record["enrichment"]:
121
+ geo_data = record["enrichment"]
122
+
123
+ # Check if we should use existing geo data or force a new lookup
124
+ force_lookup = geo_params.get("force", False)
125
+
126
+ if geo_data and not force_lookup:
127
+ # Use existing geo data
128
+ pass
129
+ else:
130
+ # Apply the geo mutator to get geo data
131
+ # Build mutator params from geo_params
132
+ mutator_params = []
133
+ for param_name, param_value in geo_params.items():
134
+ mutator_params.append([param_name, param_value])
135
+
136
+ # If no force parameter was specified, add the default
137
+ if "force" not in geo_params:
138
+ mutator_params.append(["force", force_lookup])
139
+
140
+ geo_mutator: Dict[str, Any] = {"name": "geoip_lookup"}
141
+ if mutator_params:
142
+ geo_mutator["params"] = mutator_params
143
+
144
+ # Apply any field mutators before the geo lookup
145
+ if field_mutators:
146
+ field_value = apply_mutators(field_value, field_mutators, actual_field, record)
147
+
148
+ # Apply geo lookup
149
+ geo_data = apply_mutators(field_value, [geo_mutator], actual_field, record)
150
+
151
+ # Now evaluate the conditions against the geo data
152
+ if conditions:
153
+ # Handle None geo_data (e.g., private IPs or lookup failures)
154
+ if geo_data is None:
155
+ geo_data = {}
156
+
157
+ # Create a temporary record with the geo data
158
+ # The conditions are evaluated against the geo fields directly
159
+ temp_record = geo_data.get("geo", {})
160
+ # Also include AS data if present
161
+ if "as" in geo_data:
162
+ temp_record["as"] = geo_data["as"]
163
+ return self._evaluate_node(conditions, temp_record, {})
164
+ else:
165
+ # No conditions, just checking if geo lookup succeeded
166
+ return bool(geo_data and "geo" in geo_data)
167
+
168
+ def evaluate_nslookup_expr( # noqa: C901
169
+ self, node: Dict[str, Any], record: Dict[str, Any], field_mappings: Dict[str, str]
170
+ ) -> bool:
171
+ """Evaluate a nslookup() expression.
172
+
173
+ Args:
174
+ node: NSLookup expression AST node
175
+ record: Record to evaluate against
176
+ field_mappings: Field name mappings
177
+
178
+ Returns:
179
+ Boolean result of the nslookup expression
180
+ """
181
+ field_name = node["field"]
182
+ field_mutators = node.get("field_mutators", [])
183
+ conditions = node["conditions"]
184
+ nslookup_params = node.get("nslookup_params", {})
185
+
186
+ # Apply field mapping if available
187
+ actual_field = field_name
188
+ if field_name in field_mappings:
189
+ mapping = field_mappings[field_name]
190
+ if isinstance(mapping, str):
191
+ # Simple string mapping
192
+ if mapping not in [
193
+ "keyword",
194
+ "text",
195
+ "long",
196
+ "integer",
197
+ "short",
198
+ "byte",
199
+ "double",
200
+ "float",
201
+ "boolean",
202
+ "date",
203
+ "ip",
204
+ ]:
205
+ # This is a field name mapping, not a type
206
+ actual_field = mapping
207
+ elif isinstance(mapping, dict) and mapping:
208
+ # Intelligent mapping - extract the base field
209
+ if "type" in mapping and len(mapping) == 1:
210
+ # Just a type specification, use original field
211
+ actual_field = field_name
212
+ else:
213
+ # Find the first field that's not a meta field
214
+ for key in mapping:
215
+ if key != "analyzer" and key != "type":
216
+ actual_field = key
217
+ break
218
+
219
+ # Get the field value (hostname or IP)
220
+ field_value = self._get_field_value(record, actual_field)
221
+
222
+ # If field is missing or None, return False
223
+ if field_value is self._MISSING_FIELD or field_value is None:
224
+ return False
225
+
226
+ # Check if the record already has DNS data (from post-processing)
227
+ dns_data = None
228
+
229
+ # Check if a custom field location was specified
230
+ custom_field = nslookup_params.get("field")
231
+
232
+ if custom_field:
233
+ # Check the custom field location
234
+ custom_data = self._get_field_value(record, custom_field)
235
+ if custom_data is not self._MISSING_FIELD and isinstance(custom_data, dict):
236
+ # Check if this looks like DNS data
237
+ if any(key in custom_data for key in ["question", "answers", "resolved_ip"]):
238
+ dns_data = custom_data
239
+ else:
240
+ # Default locations
241
+ # If field is like "destination.ip", DNS data should be in "destination.domain"
242
+ if "." in field_name:
243
+ # Nested field like destination.ip or source.hostname
244
+ parent_path = field_name.rsplit(".", 1)[0]
245
+ parent: Optional[Dict[str, Any]] = record
246
+ for part in parent_path.split("."):
247
+ if isinstance(parent, dict) and part in parent:
248
+ parent = parent[part]
249
+ else:
250
+ parent = None
251
+ break
252
+
253
+ if parent and isinstance(parent, dict) and "domain" in parent:
254
+ dns_data = parent["domain"]
255
+ else:
256
+ # Top-level field - check enrichment.domain
257
+ enrichment = record.get("enrichment", {})
258
+ if "domain" in enrichment:
259
+ dns_data = enrichment["domain"]
260
+
261
+ # Check if we should use existing DNS data or force a new lookup
262
+ force_lookup = nslookup_params.get("force", False)
263
+
264
+ if dns_data and not force_lookup:
265
+ # Use existing DNS data
266
+ pass
267
+ else:
268
+ # Apply the nslookup mutator to get DNS data
269
+ # Build mutator params from nslookup_params
270
+ mutator_params = []
271
+ for param_name, param_value in nslookup_params.items():
272
+ mutator_params.append([param_name, param_value])
273
+
274
+ # If no force parameter was specified, add the default
275
+ if "force" not in nslookup_params:
276
+ mutator_params.append(["force", force_lookup])
277
+
278
+ nslookup_mutator: Dict[str, Any] = {"name": "nslookup"}
279
+ if mutator_params:
280
+ nslookup_mutator["params"] = mutator_params
281
+
282
+ # Apply any field mutators before the nslookup
283
+ if field_mutators:
284
+ field_value = apply_mutators(field_value, field_mutators, field_name, record)
285
+
286
+ # Apply nslookup
287
+ dns_data = apply_mutators(field_value, [nslookup_mutator], field_name, record)
288
+
289
+ # Now evaluate the conditions against the DNS data
290
+ if conditions:
291
+ # Create a temporary record with the DNS data at root level
292
+ temp_record = dns_data if dns_data else {}
293
+ return self._evaluate_node(conditions, temp_record, {})
294
+ else:
295
+ # No conditions, just checking if nslookup succeeded
296
+ return bool(dns_data)