tellaro-query-language 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. tellaro_query_language-0.1.0.dist-info/LICENSE +21 -0
  2. tellaro_query_language-0.1.0.dist-info/METADATA +401 -0
  3. tellaro_query_language-0.1.0.dist-info/RECORD +56 -0
  4. tellaro_query_language-0.1.0.dist-info/WHEEL +4 -0
  5. tellaro_query_language-0.1.0.dist-info/entry_points.txt +7 -0
  6. tql/__init__.py +47 -0
  7. tql/analyzer.py +385 -0
  8. tql/cache/__init__.py +7 -0
  9. tql/cache/base.py +25 -0
  10. tql/cache/memory.py +63 -0
  11. tql/cache/redis.py +68 -0
  12. tql/core.py +929 -0
  13. tql/core_components/README.md +92 -0
  14. tql/core_components/__init__.py +20 -0
  15. tql/core_components/file_operations.py +113 -0
  16. tql/core_components/opensearch_operations.py +869 -0
  17. tql/core_components/stats_operations.py +200 -0
  18. tql/core_components/validation_operations.py +599 -0
  19. tql/evaluator.py +379 -0
  20. tql/evaluator_components/README.md +131 -0
  21. tql/evaluator_components/__init__.py +17 -0
  22. tql/evaluator_components/field_access.py +176 -0
  23. tql/evaluator_components/special_expressions.py +296 -0
  24. tql/evaluator_components/value_comparison.py +315 -0
  25. tql/exceptions.py +160 -0
  26. tql/geoip_normalizer.py +233 -0
  27. tql/mutator_analyzer.py +830 -0
  28. tql/mutators/__init__.py +222 -0
  29. tql/mutators/base.py +78 -0
  30. tql/mutators/dns.py +316 -0
  31. tql/mutators/encoding.py +218 -0
  32. tql/mutators/geo.py +363 -0
  33. tql/mutators/list.py +212 -0
  34. tql/mutators/network.py +163 -0
  35. tql/mutators/security.py +225 -0
  36. tql/mutators/string.py +165 -0
  37. tql/opensearch.py +78 -0
  38. tql/opensearch_components/README.md +130 -0
  39. tql/opensearch_components/__init__.py +17 -0
  40. tql/opensearch_components/field_mapping.py +399 -0
  41. tql/opensearch_components/lucene_converter.py +305 -0
  42. tql/opensearch_components/query_converter.py +775 -0
  43. tql/opensearch_mappings.py +309 -0
  44. tql/opensearch_stats.py +451 -0
  45. tql/parser.py +1363 -0
  46. tql/parser_components/README.md +72 -0
  47. tql/parser_components/__init__.py +20 -0
  48. tql/parser_components/ast_builder.py +162 -0
  49. tql/parser_components/error_analyzer.py +101 -0
  50. tql/parser_components/field_extractor.py +112 -0
  51. tql/parser_components/grammar.py +473 -0
  52. tql/post_processor.py +737 -0
  53. tql/scripts.py +124 -0
  54. tql/stats_evaluator.py +444 -0
  55. tql/stats_transformer.py +184 -0
  56. tql/validators.py +110 -0
@@ -0,0 +1,451 @@
1
+ """OpenSearch stats translator for TQL.
2
+
3
+ This module translates TQL stats queries to OpenSearch aggregation DSL.
4
+ """
5
+
6
+ from typing import Any, Dict, List, Optional, Union
7
+
8
+ from .exceptions import TQLError
9
+
10
+
11
+ class OpenSearchStatsTranslator:
12
+ """Translates TQL stats AST to OpenSearch aggregations."""
13
+
14
+ # Map TQL aggregation functions to OpenSearch aggregation types
15
+ AGGREGATION_MAPPING = {
16
+ "count": "value_count",
17
+ "unique_count": "cardinality",
18
+ "sum": "sum",
19
+ "min": "min",
20
+ "max": "max",
21
+ "average": "avg",
22
+ "avg": "avg",
23
+ "median": "percentiles",
24
+ "med": "percentiles",
25
+ "std": "extended_stats", # Will extract std_deviation
26
+ "standard_deviation": "extended_stats",
27
+ "percentile": "percentiles",
28
+ "percentiles": "percentiles",
29
+ "p": "percentiles",
30
+ "pct": "percentiles",
31
+ "percentile_rank": "percentile_ranks",
32
+ "percentile_ranks": "percentile_ranks",
33
+ "pct_rank": "percentile_ranks",
34
+ "pct_ranks": "percentile_ranks",
35
+ "zscore": None, # Requires post-processing
36
+ }
37
+
38
+ # Aggregations that require numeric fields
39
+ NUMERIC_AGGREGATIONS = {
40
+ "sum",
41
+ "min",
42
+ "max",
43
+ "average",
44
+ "avg",
45
+ "median",
46
+ "med",
47
+ "std",
48
+ "standard_deviation",
49
+ "zscore",
50
+ "percentile",
51
+ "percentiles",
52
+ "p",
53
+ "pct",
54
+ "percentile_rank",
55
+ "percentile_ranks",
56
+ "pct_rank",
57
+ "pct_ranks",
58
+ }
59
+
60
+ def translate_stats(
61
+ self, stats_ast: Dict[str, Any], field_mappings: Optional[Dict[str, str]] = None
62
+ ) -> Dict[str, Any]:
63
+ """Translate stats AST to OpenSearch aggregations.
64
+
65
+ Args:
66
+ stats_ast: Stats AST from parser
67
+ field_mappings: Optional field type mappings
68
+
69
+ Returns:
70
+ OpenSearch aggregation DSL
71
+ """
72
+ aggregations = stats_ast.get("aggregations", [])
73
+ group_by_fields = stats_ast.get("group_by", [])
74
+
75
+ if not aggregations:
76
+ raise TQLError("No aggregations specified in stats query")
77
+
78
+ # Build OpenSearch aggregations
79
+ aggs_dsl = {}
80
+
81
+ if group_by_fields:
82
+ # Build nested terms aggregations for grouping
83
+ aggs_dsl = self._build_grouped_aggregations(aggregations, group_by_fields, field_mappings)
84
+ else:
85
+ # Simple aggregations without grouping
86
+ aggs_dsl = self._build_simple_aggregations(aggregations, field_mappings)
87
+
88
+ return {"aggs": aggs_dsl}
89
+
90
+ def _build_simple_aggregations( # noqa: C901
91
+ self, aggregations: List[Dict[str, Any]], field_mappings: Optional[Dict[str, str]] = None
92
+ ) -> Dict[str, Any]:
93
+ """Build simple aggregations without grouping.
94
+
95
+ Args:
96
+ aggregations: List of aggregation specifications
97
+ field_mappings: Optional field mappings
98
+
99
+ Returns:
100
+ OpenSearch aggregations DSL
101
+ """
102
+ aggs_dsl: Dict[str, Any] = {}
103
+
104
+ for i, agg in enumerate(aggregations):
105
+ func = agg["function"]
106
+ field = agg["field"]
107
+ alias = agg.get("alias") or f"{func}_{field}_{i}"
108
+
109
+ # Validate field type if mappings provided
110
+ if field_mappings and func in self.NUMERIC_AGGREGATIONS and field != "*":
111
+ self._validate_numeric_field(field, func, field_mappings)
112
+
113
+ # Build aggregation based on function
114
+ if func == "count" and field == "*":
115
+ # Special case for count(*)
116
+ aggs_dsl[alias] = {"value_count": {"field": "_id"}}
117
+ elif func in self.AGGREGATION_MAPPING:
118
+ os_agg_type = self.AGGREGATION_MAPPING[func]
119
+
120
+ if os_agg_type is None:
121
+ # Functions that require post-processing
122
+ raise TQLError(
123
+ f"Aggregation function '{func}' requires post-processing",
124
+ suggestions=[
125
+ "This function is not directly supported by OpenSearch",
126
+ "Consider using a different function or processing results client-side",
127
+ ],
128
+ )
129
+
130
+ # Build aggregation
131
+ if func in ["median", "med"]:
132
+ # Median uses percentiles aggregation at 50th percentile
133
+ aggs_dsl[alias] = {"percentiles": {"field": field, "percents": ["50"]}}
134
+ elif func in ["std", "standard_deviation"]:
135
+ # Standard deviation uses extended_stats
136
+ aggs_dsl[alias] = {"extended_stats": {"field": field}}
137
+ elif func in ["percentile", "percentiles", "p", "pct"]:
138
+ # Percentiles aggregation with custom values
139
+ percentile_values = agg.get("percentile_values", [50])
140
+ # Convert to strings for OpenSearch
141
+ percents = [str(p) for p in percentile_values]
142
+ aggs_dsl[alias] = {"percentiles": {"field": field, "percents": percents}}
143
+ elif func in ["percentile_rank", "percentile_ranks", "pct_rank", "pct_ranks"]:
144
+ # Percentile ranks aggregation
145
+ rank_values = agg.get("rank_values", [])
146
+ if not rank_values:
147
+ raise TQLError("percentile_rank requires at least one value")
148
+ aggs_dsl[alias] = {"percentile_ranks": {"field": field, "values": rank_values}}
149
+ else:
150
+ # Direct mapping
151
+ aggs_dsl[alias] = {os_agg_type: {"field": field}}
152
+ else:
153
+ raise TQLError(f"Unknown aggregation function: {func}")
154
+
155
+ return aggs_dsl
156
+
157
+ def _build_grouped_aggregations(
158
+ self,
159
+ aggregations: List[Dict[str, Any]],
160
+ group_by_fields: List[str],
161
+ field_mappings: Optional[Dict[str, str]] = None,
162
+ ) -> Dict[str, Any]:
163
+ """Build aggregations with grouping.
164
+
165
+ Args:
166
+ aggregations: List of aggregation specifications
167
+ group_by_fields: Fields to group by
168
+ field_mappings: Optional field mappings
169
+
170
+ Returns:
171
+ OpenSearch aggregations DSL with nested terms aggregations
172
+ """
173
+ # Start with the innermost aggregations
174
+ inner_aggs = self._build_simple_aggregations(aggregations, field_mappings)
175
+
176
+ # Check for top/bottom modifiers
177
+ order_field = None
178
+ order_direction = "desc"
179
+ size = 10
180
+
181
+ for agg in aggregations:
182
+ if "modifier" in agg:
183
+ # Configure ordering based on modifier
184
+ alias = agg.get("alias") or f"{agg['function']}_{agg['field']}_0"
185
+ order_field = alias
186
+ order_direction = "desc" if agg["modifier"] == "top" else "asc"
187
+ size = agg.get("limit", 10)
188
+ break
189
+
190
+ # Build nested terms aggregations for each group_by field
191
+ current_aggs = inner_aggs
192
+
193
+ # Process group_by fields in reverse order to build proper nesting
194
+ for field in reversed(group_by_fields):
195
+ terms_agg = {"terms": {"field": field, "size": size}}
196
+
197
+ # Add ordering if this is the outermost aggregation and we have order field
198
+ if field == group_by_fields[0] and order_field:
199
+ # For nested aggregations, we need the full path
200
+ order_path = order_field
201
+ if len(group_by_fields) > 1:
202
+ # Multi-level grouping requires special handling
203
+ # OpenSearch doesn't support ordering by sub-aggregations in nested terms
204
+ # We'll need to handle this in post-processing
205
+ pass
206
+ else:
207
+ terms_agg["terms"]["order"] = {order_path: order_direction}
208
+
209
+ # Add sub-aggregations
210
+ if current_aggs:
211
+ terms_agg["aggs"] = current_aggs
212
+
213
+ # Wrap for next level
214
+ current_aggs = {f"group_by_{field}": terms_agg}
215
+
216
+ return current_aggs
217
+
218
+ def _validate_numeric_field(self, field: str, function: str, field_mappings: Dict[str, str]) -> None:
219
+ """Validate that a field is numeric for numeric aggregations.
220
+
221
+ Args:
222
+ field: Field name
223
+ function: Aggregation function
224
+ field_mappings: Field type mappings
225
+
226
+ Raises:
227
+ TQLError: If field is not numeric
228
+ """
229
+ field_type = field_mappings.get(field, "unknown")
230
+
231
+ # OpenSearch numeric types
232
+ numeric_types = {
233
+ "long",
234
+ "integer",
235
+ "short",
236
+ "byte",
237
+ "double",
238
+ "float",
239
+ "half_float",
240
+ "scaled_float",
241
+ "unsigned_long",
242
+ }
243
+
244
+ if field_type not in numeric_types and field_type != "unknown":
245
+ raise TQLError(
246
+ f"Cannot perform {function}() on non-numeric field '{field}'. "
247
+ f"Field '{field}' has type '{field_type}'. "
248
+ f"Numeric aggregations require numeric types: {', '.join(sorted(numeric_types))}"
249
+ )
250
+
251
+ def transform_response(self, response: Dict[str, Any], stats_ast: Dict[str, Any]) -> Dict[str, Any]:
252
+ """Transform OpenSearch aggregation response to TQL format.
253
+
254
+ Args:
255
+ response: OpenSearch aggregation response
256
+ stats_ast: Original stats AST for reference
257
+
258
+ Returns:
259
+ Transformed response in TQL format
260
+ """
261
+ aggregations = stats_ast.get("aggregations", [])
262
+ group_by_fields = stats_ast.get("group_by", [])
263
+
264
+ if not group_by_fields:
265
+ # Simple aggregation response
266
+ return self._transform_simple_response(response, aggregations)
267
+ else:
268
+ # Grouped aggregation response
269
+ return self._transform_grouped_response(response, aggregations, group_by_fields)
270
+
271
+ def _transform_simple_response(
272
+ self, response: Dict[str, Any], aggregations: List[Dict[str, Any]]
273
+ ) -> Dict[str, Any]:
274
+ """Transform simple aggregation response.
275
+
276
+ Args:
277
+ response: OpenSearch response
278
+ aggregations: Aggregation specifications
279
+
280
+ Returns:
281
+ Transformed response
282
+ """
283
+ aggs_data = response.get("aggregations", {})
284
+
285
+ if len(aggregations) == 1:
286
+ # Single aggregation
287
+ agg = aggregations[0]
288
+ alias = agg.get("alias") or f"{agg['function']}_{agg['field']}_0"
289
+
290
+ value = self._extract_aggregation_value(aggs_data.get(alias, {}), agg["function"])
291
+
292
+ return {
293
+ "type": "simple_aggregation",
294
+ "function": agg["function"],
295
+ "field": agg["field"],
296
+ "alias": agg.get("alias"),
297
+ "value": value,
298
+ }
299
+ else:
300
+ # Multiple aggregations
301
+ results = {}
302
+ for i, agg in enumerate(aggregations):
303
+ alias = agg.get("alias") or f"{agg['function']}_{agg['field']}_{i}"
304
+ value = self._extract_aggregation_value(aggs_data.get(alias, {}), agg["function"])
305
+ key = agg.get("alias") or f"{agg['function']}_{agg['field']}"
306
+ results[key] = value
307
+
308
+ return {"type": "multiple_aggregations", "results": results}
309
+
310
+ def _transform_grouped_response(
311
+ self, response: Dict[str, Any], aggregations: List[Dict[str, Any]], group_by_fields: List[str]
312
+ ) -> Dict[str, Any]:
313
+ """Transform grouped aggregation response.
314
+
315
+ Args:
316
+ response: OpenSearch response
317
+ aggregations: Aggregation specifications
318
+ group_by_fields: Grouping fields
319
+
320
+ Returns:
321
+ Transformed response
322
+ """
323
+ # Navigate to the grouped results
324
+ aggs_data = response.get("aggregations", {})
325
+
326
+ # Get the outermost grouping
327
+ first_group_key = f"group_by_{group_by_fields[0]}"
328
+ grouped_data = aggs_data.get(first_group_key, {})
329
+
330
+ # Extract buckets
331
+ buckets = grouped_data.get("buckets", [])
332
+
333
+ # Transform buckets
334
+ results = []
335
+ for bucket in buckets:
336
+ result = self._transform_bucket(bucket, aggregations, group_by_fields, 0)
337
+ if result:
338
+ results.append(result)
339
+
340
+ return {"type": "grouped_aggregation", "group_by": group_by_fields, "results": results}
341
+
342
+ def _transform_bucket(
343
+ self, bucket: Dict[str, Any], aggregations: List[Dict[str, Any]], group_by_fields: List[str], level: int
344
+ ) -> Optional[Dict[str, Any]]:
345
+ """Transform a single bucket from grouped aggregation.
346
+
347
+ Args:
348
+ bucket: OpenSearch bucket
349
+ aggregations: Aggregation specifications
350
+ group_by_fields: Grouping fields
351
+ level: Current nesting level
352
+
353
+ Returns:
354
+ Transformed bucket or None
355
+ """
356
+ result = {"key": {}, "doc_count": bucket.get("doc_count", 0)}
357
+
358
+ # Add current level key
359
+ if level < len(group_by_fields):
360
+ field = group_by_fields[level]
361
+ result["key"][field] = bucket.get("key")
362
+
363
+ # Check if there are more levels
364
+ if level + 1 < len(group_by_fields):
365
+ # Navigate to next level
366
+ next_field = group_by_fields[level + 1]
367
+ next_group_key = f"group_by_{next_field}"
368
+
369
+ if next_group_key in bucket:
370
+ # This is a nested grouping, we need to aggregate the sub-buckets
371
+ # For now, we'll just take the first sub-bucket
372
+ # TODO: Handle proper multi-level grouping
373
+ sub_buckets = bucket[next_group_key].get("buckets", [])
374
+ if sub_buckets:
375
+ sub_result = self._transform_bucket(sub_buckets[0], aggregations, group_by_fields, level + 1)
376
+ if sub_result:
377
+ # Merge keys
378
+ result["key"].update(sub_result["key"])
379
+
380
+ # Extract aggregation values
381
+ if len(aggregations) == 1:
382
+ # Single aggregation
383
+ agg = aggregations[0]
384
+ alias = agg.get("alias") or f"{agg['function']}_{agg['field']}_0"
385
+ value = self._extract_aggregation_value(bucket.get(alias, {}), agg["function"])
386
+ agg_key = agg.get("alias") or agg["function"]
387
+ result[agg_key] = value
388
+ else:
389
+ # Multiple aggregations
390
+ result["aggregations"] = {}
391
+ for i, agg in enumerate(aggregations):
392
+ alias = agg.get("alias") or f"{agg['function']}_{agg['field']}_{i}"
393
+ value = self._extract_aggregation_value(bucket.get(alias, {}), agg["function"])
394
+ agg_key = agg.get("alias") or f"{agg['function']}_{agg['field']}"
395
+ result["aggregations"][agg_key] = value
396
+
397
+ return result
398
+
399
+ def _extract_aggregation_value( # noqa: C901
400
+ self, agg_result: Dict[str, Any], function: str
401
+ ) -> Union[int, float, Dict[str, Any], None]:
402
+ """Extract value from OpenSearch aggregation result.
403
+
404
+ Args:
405
+ agg_result: OpenSearch aggregation result
406
+ function: TQL aggregation function
407
+
408
+ Returns:
409
+ Extracted value
410
+ """
411
+ if function == "count":
412
+ return agg_result.get("value", 0)
413
+ elif function == "unique_count":
414
+ return agg_result.get("value", 0)
415
+ elif function in ["sum", "min", "max", "average", "avg"]:
416
+ return agg_result.get("value")
417
+ elif function in ["median", "med"]:
418
+ # Extract from percentiles
419
+ values = agg_result.get("values", {})
420
+ return values.get("50.0") or values.get("50")
421
+ elif function in ["std", "standard_deviation"]:
422
+ # Extract from extended_stats
423
+ return agg_result.get("std_deviation")
424
+ elif function in ["percentile", "percentiles", "p", "pct"]:
425
+ # Extract percentile values
426
+ values = agg_result.get("values", {})
427
+ if len(values) == 1:
428
+ # Single percentile - return just the value
429
+ return list(values.values())[0]
430
+ else:
431
+ # Multiple percentiles - return dict
432
+ result = {}
433
+ for k, v in values.items():
434
+ # Convert "95.0" to "p95"
435
+ percentile = int(float(k))
436
+ result[f"p{percentile}"] = v
437
+ return result
438
+ elif function in ["percentile_rank", "percentile_ranks", "pct_rank", "pct_ranks"]:
439
+ # Extract percentile rank values
440
+ values = agg_result.get("values", {})
441
+ if len(values) == 1:
442
+ # Single rank - return just the value
443
+ return list(values.values())[0]
444
+ else:
445
+ # Multiple ranks - return dict
446
+ result = {}
447
+ for k, v in values.items():
448
+ result[f"rank_{k}"] = v
449
+ return result
450
+ else:
451
+ return None