tellaro-query-language 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. tellaro_query_language-0.1.0.dist-info/LICENSE +21 -0
  2. tellaro_query_language-0.1.0.dist-info/METADATA +401 -0
  3. tellaro_query_language-0.1.0.dist-info/RECORD +56 -0
  4. tellaro_query_language-0.1.0.dist-info/WHEEL +4 -0
  5. tellaro_query_language-0.1.0.dist-info/entry_points.txt +7 -0
  6. tql/__init__.py +47 -0
  7. tql/analyzer.py +385 -0
  8. tql/cache/__init__.py +7 -0
  9. tql/cache/base.py +25 -0
  10. tql/cache/memory.py +63 -0
  11. tql/cache/redis.py +68 -0
  12. tql/core.py +929 -0
  13. tql/core_components/README.md +92 -0
  14. tql/core_components/__init__.py +20 -0
  15. tql/core_components/file_operations.py +113 -0
  16. tql/core_components/opensearch_operations.py +869 -0
  17. tql/core_components/stats_operations.py +200 -0
  18. tql/core_components/validation_operations.py +599 -0
  19. tql/evaluator.py +379 -0
  20. tql/evaluator_components/README.md +131 -0
  21. tql/evaluator_components/__init__.py +17 -0
  22. tql/evaluator_components/field_access.py +176 -0
  23. tql/evaluator_components/special_expressions.py +296 -0
  24. tql/evaluator_components/value_comparison.py +315 -0
  25. tql/exceptions.py +160 -0
  26. tql/geoip_normalizer.py +233 -0
  27. tql/mutator_analyzer.py +830 -0
  28. tql/mutators/__init__.py +222 -0
  29. tql/mutators/base.py +78 -0
  30. tql/mutators/dns.py +316 -0
  31. tql/mutators/encoding.py +218 -0
  32. tql/mutators/geo.py +363 -0
  33. tql/mutators/list.py +212 -0
  34. tql/mutators/network.py +163 -0
  35. tql/mutators/security.py +225 -0
  36. tql/mutators/string.py +165 -0
  37. tql/opensearch.py +78 -0
  38. tql/opensearch_components/README.md +130 -0
  39. tql/opensearch_components/__init__.py +17 -0
  40. tql/opensearch_components/field_mapping.py +399 -0
  41. tql/opensearch_components/lucene_converter.py +305 -0
  42. tql/opensearch_components/query_converter.py +775 -0
  43. tql/opensearch_mappings.py +309 -0
  44. tql/opensearch_stats.py +451 -0
  45. tql/parser.py +1363 -0
  46. tql/parser_components/README.md +72 -0
  47. tql/parser_components/__init__.py +20 -0
  48. tql/parser_components/ast_builder.py +162 -0
  49. tql/parser_components/error_analyzer.py +101 -0
  50. tql/parser_components/field_extractor.py +112 -0
  51. tql/parser_components/grammar.py +473 -0
  52. tql/post_processor.py +737 -0
  53. tql/scripts.py +124 -0
  54. tql/stats_evaluator.py +444 -0
  55. tql/stats_transformer.py +184 -0
  56. tql/validators.py +110 -0
tql/scripts.py ADDED
@@ -0,0 +1,124 @@
1
+ """ Runs pytest, coverage, linters, and security checks. """
2
+
3
+ import subprocess # nosec
4
+
5
+
6
+ def get_modified_files_as_set():
7
+ """Get a set of modified files in the current git branch."""
8
+ # Run the git command
9
+ result = subprocess.run( # nosec
10
+ ["git", "diff", "--name-only", "HEAD"],
11
+ capture_output=True, # Redirect stdout/stderr
12
+ text=True, # Decode output to string
13
+ check=False,
14
+ )
15
+
16
+ # Strip whitespace and split on newlines
17
+ file_list = result.stdout.strip().split("\n")
18
+ # Remove "pyproject.toml" from the list
19
+ file_list = [f for f in file_list if f != "pyproject.toml"]
20
+
21
+ # Convert to a set (filter out any empty strings that might occur)
22
+ modified_files = {f for f in file_list if f}
23
+
24
+ return modified_files
25
+
26
+
27
+ def run_coverage():
28
+ """
29
+ Run coverage against all files in the `src` directory
30
+ and output an XML report to `reports/coverage.xml`.
31
+ """
32
+ # 1. Run pytest with coverage, using `src` as the source
33
+ subprocess.run(["coverage", "run", "--source=src", "-m", "pytest"], check=True) # nosec
34
+
35
+ # 2. Generate an XML coverage report in `reports/coverage.xml`
36
+ subprocess.run(["coverage", "xml", "-o", "reports/coverage/coverage.xml"], check=True) # nosec
37
+
38
+ print("Coverage completed. XML report generated at reports/coverage.xml.")
39
+
40
+
41
+ def run_tests():
42
+ """Runs pytests against tests in the `tests` directory."""
43
+ subprocess.run(["pytest", "tests"], check=True) # nosec
44
+
45
+
46
+ def run_lint_all():
47
+ """
48
+ Run linters for black, pylint, flake8, and isort
49
+ """
50
+ subprocess.run( # nosec
51
+ ["black", "src", "tests"],
52
+ check=False,
53
+ )
54
+ subprocess.run( # nosec
55
+ ["isort", "src", "tests"],
56
+ check=False,
57
+ )
58
+ subprocess.run( # nosec
59
+ ["flake8", "src", "tests"],
60
+ check=False,
61
+ )
62
+ subprocess.run( # nosec
63
+ ["pylint", "src", "tests"],
64
+ check=False,
65
+ )
66
+
67
+
68
+ def run_lint():
69
+ """
70
+ Run linters for black, pylint, flake8, and isort on modified git files
71
+ """
72
+ files = get_modified_files_as_set()
73
+ files_list = list(files)
74
+
75
+ if not files_list:
76
+ print("No modified files detected.")
77
+ return
78
+
79
+ subprocess.run(["black", *files_list], check=False) # black # nosec
80
+ subprocess.run(["isort", *files_list], check=False) # isort # nosec
81
+ subprocess.run(["flake8", *files_list], check=False) # flake8 # nosec
82
+ subprocess.run(["pylint", *files_list], check=False) # pylint # nosec
83
+
84
+
85
+ def run_badge():
86
+ """Generate a badge using genbadge."""
87
+ subprocess.run( # nosec
88
+ [
89
+ "coverage",
90
+ "run",
91
+ "--source=src",
92
+ "-m",
93
+ "pytest",
94
+ "--junit-xml=reports/junit/junit.xml",
95
+ ],
96
+ check=True,
97
+ )
98
+
99
+ # 2. Generate an XML coverage report in `reports/coverage.xml`
100
+ subprocess.run(["coverage", "xml", "-o", "reports/coverage/coverage.xml"], check=True) # nosec
101
+
102
+ # 3. Generate an Flake8 report in `reports/flake8stats.xml`
103
+ subprocess.run( # nosec
104
+ [
105
+ "flake8",
106
+ "--statistics",
107
+ "--output-file=reports/flake8/flake8stats.txt",
108
+ "--extend-exclude",
109
+ ".github,reports,.venv,.vscode",
110
+ ],
111
+ check=False,
112
+ )
113
+
114
+ # 4. Generate badge for flake8
115
+ subprocess.run(["genbadge", "flake8", "-o", "badge/flake8-badge.svg"], check=True) # nosec
116
+
117
+ # 5. Generate badge for coverage
118
+ subprocess.run(["genbadge", "coverage", "-o", "badge/coverage-badge.svg"], check=True) # nosec
119
+
120
+ # 6. Generate badge for tests
121
+ subprocess.run( # nosec
122
+ ["genbadge", "tests", "-t", "90", "-o", "badge/test-badge.svg"],
123
+ check=True,
124
+ )
tql/stats_evaluator.py ADDED
@@ -0,0 +1,444 @@
1
+ """Stats evaluator for TQL aggregation queries.
2
+
3
+ This module provides the TQLStatsEvaluator class for executing statistical
4
+ aggregation queries against data records in memory.
5
+ """
6
+
7
+ import statistics
8
+ from collections import defaultdict
9
+ from typing import Any, Dict, List, Optional, Union
10
+
11
+ from .exceptions import TQLError
12
+
13
+
14
+ class TQLStatsEvaluator:
15
+ """Evaluates TQL stats queries against data records.
16
+
17
+ This class handles statistical aggregations, grouping, and produces
18
+ results in a UI-friendly format.
19
+ """
20
+
21
+ # Aggregation functions that require numeric fields
22
+ NUMERIC_AGGREGATIONS = {
23
+ "sum",
24
+ "min",
25
+ "max",
26
+ "average",
27
+ "avg",
28
+ "median",
29
+ "med",
30
+ "std",
31
+ "standard_deviation",
32
+ "percentile",
33
+ "percentiles",
34
+ "p",
35
+ "pct",
36
+ "percentile_rank",
37
+ "percentile_ranks",
38
+ "pct_rank",
39
+ "pct_ranks",
40
+ }
41
+
42
+ # Aggregation functions that work with any field type
43
+ ANY_TYPE_AGGREGATIONS = {"count", "unique_count"}
44
+
45
+ # Numeric types supported by OpenSearch
46
+ NUMERIC_TYPES = {
47
+ "long",
48
+ "integer",
49
+ "short",
50
+ "byte",
51
+ "double",
52
+ "float",
53
+ "half_float",
54
+ "scaled_float",
55
+ "unsigned_long",
56
+ }
57
+
58
+ def __init__(self):
59
+ """Initialize the stats evaluator."""
60
+
61
+ def evaluate_stats(
62
+ self, records: List[Dict[str, Any]], stats_ast: Dict[str, Any], field_mappings: Optional[Dict[str, str]] = None
63
+ ) -> Dict[str, Any]:
64
+ """Evaluate stats query against records.
65
+
66
+ Args:
67
+ records: List of records to aggregate
68
+ stats_ast: Stats AST from parser
69
+ field_mappings: Optional field type mappings
70
+
71
+ Returns:
72
+ Aggregated results in UI-friendly format
73
+ """
74
+ aggregations = stats_ast.get("aggregations", [])
75
+ group_by_fields = stats_ast.get("group_by", [])
76
+
77
+ # Validate aggregation types against field mappings if provided
78
+ if field_mappings:
79
+ self._validate_aggregations(aggregations, field_mappings)
80
+
81
+ if not group_by_fields:
82
+ # Simple aggregation without grouping
83
+ return self._simple_aggregation(records, aggregations)
84
+ else:
85
+ # Grouped aggregation
86
+ return self._grouped_aggregation(records, aggregations, group_by_fields)
87
+
88
+ def _validate_aggregations(self, aggregations: List[Dict[str, Any]], field_mappings: Dict[str, str]) -> None:
89
+ """Validate that aggregation functions are compatible with field types.
90
+
91
+ Args:
92
+ aggregations: List of aggregation specifications
93
+ field_mappings: Field type mappings
94
+
95
+ Raises:
96
+ TQLError: If aggregation is incompatible with field type
97
+ """
98
+ for agg in aggregations:
99
+ func = agg["function"]
100
+ field = agg["field"]
101
+
102
+ # Skip validation for count(*)
103
+ if field == "*":
104
+ continue
105
+
106
+ # Check if function requires numeric type
107
+ if func in self.NUMERIC_AGGREGATIONS:
108
+ field_type = field_mappings.get(field, "unknown")
109
+
110
+ if field_type not in self.NUMERIC_TYPES and field_type != "unknown":
111
+ raise TQLError(
112
+ f"Cannot perform {func}() on non-numeric field '{field}' (type: {field_type}). "
113
+ f"Use count() or unique_count() for non-numeric fields, or ensure '{field}' "
114
+ f"is mapped as a numeric type."
115
+ )
116
+
117
+ def _simple_aggregation(self, records: List[Dict[str, Any]], aggregations: List[Dict[str, Any]]) -> Dict[str, Any]:
118
+ """Perform aggregation without grouping.
119
+
120
+ Args:
121
+ records: Records to aggregate
122
+ aggregations: Aggregation specifications
123
+
124
+ Returns:
125
+ Aggregated results
126
+ """
127
+ if len(aggregations) == 1:
128
+ # Single aggregation
129
+ agg = aggregations[0]
130
+ value = self._calculate_aggregation(records, agg)
131
+
132
+ return {
133
+ "type": "simple_aggregation",
134
+ "function": agg["function"],
135
+ "field": agg["field"],
136
+ "alias": agg.get("alias"),
137
+ "value": value,
138
+ }
139
+ else:
140
+ # Multiple aggregations
141
+ results = {}
142
+ for agg in aggregations:
143
+ value = self._calculate_aggregation(records, agg)
144
+ key = agg.get("alias") or f"{agg['function']}_{agg['field']}"
145
+ results[key] = value
146
+
147
+ return {"type": "multiple_aggregations", "results": results}
148
+
149
+ def _grouped_aggregation(
150
+ self, records: List[Dict[str, Any]], aggregations: List[Dict[str, Any]], group_by_fields: List[str]
151
+ ) -> Dict[str, Any]:
152
+ """Perform aggregation with grouping.
153
+
154
+ Args:
155
+ records: Records to aggregate
156
+ aggregations: Aggregation specifications
157
+ group_by_fields: Fields to group by
158
+
159
+ Returns:
160
+ Grouped aggregation results
161
+ """
162
+ # Group records
163
+ groups = defaultdict(list)
164
+ for record in records:
165
+ # Build group key
166
+ key_parts = []
167
+ for field in group_by_fields:
168
+ value = self._get_field_value(record, field)
169
+ key_parts.append((field, value))
170
+ key = tuple(key_parts)
171
+ groups[key].append(record)
172
+
173
+ # Calculate aggregations for each group
174
+ results = []
175
+ for key, group_records in groups.items():
176
+ group_result: Dict[str, Any] = {"key": dict(key), "doc_count": len(group_records)}
177
+
178
+ if len(aggregations) == 1:
179
+ # Single aggregation
180
+ agg = aggregations[0]
181
+ value = self._calculate_aggregation(group_records, agg)
182
+ agg_key = agg.get("alias") or agg["function"]
183
+ group_result[agg_key] = value
184
+ else:
185
+ # Multiple aggregations
186
+ group_result["aggregations"] = {}
187
+ for agg in aggregations:
188
+ value = self._calculate_aggregation(group_records, agg)
189
+ agg_key = agg.get("alias") or f"{agg['function']}_{agg['field']}"
190
+ group_result["aggregations"][agg_key] = value
191
+
192
+ results.append(group_result)
193
+
194
+ # Apply modifiers (top/bottom)
195
+ results = self._apply_modifiers(results, aggregations)
196
+
197
+ return {"type": "grouped_aggregation", "group_by": group_by_fields, "results": results}
198
+
199
+ def _calculate_aggregation( # noqa: C901
200
+ self, records: List[Dict[str, Any]], agg_spec: Dict[str, Any]
201
+ ) -> Union[int, float, Dict[str, Any], None]:
202
+ """Calculate a single aggregation value.
203
+
204
+ Args:
205
+ records: Records to aggregate
206
+ agg_spec: Aggregation specification
207
+
208
+ Returns:
209
+ Aggregated value
210
+ """
211
+ func = agg_spec["function"]
212
+ field = agg_spec["field"]
213
+
214
+ # Handle count(*)
215
+ if func == "count" and field == "*":
216
+ return len(records)
217
+
218
+ # Extract field values
219
+ values = []
220
+ for record in records:
221
+ value = self._get_field_value(record, field)
222
+ if value is not None:
223
+ values.append(value)
224
+
225
+ # Calculate aggregation
226
+ if func == "count":
227
+ return len(values)
228
+ elif func == "unique_count":
229
+ return len(set(values))
230
+ elif func == "sum":
231
+ return sum(self._to_numeric(v) for v in values) if values else 0
232
+ elif func == "min":
233
+ return min(self._to_numeric(v) for v in values) if values else None
234
+ elif func == "max":
235
+ return max(self._to_numeric(v) for v in values) if values else None
236
+ elif func in ["average", "avg"]:
237
+ if not values:
238
+ return None
239
+ numeric_values = [self._to_numeric(v) for v in values]
240
+ return statistics.mean(numeric_values)
241
+ elif func in ["median", "med"]:
242
+ if not values:
243
+ return None
244
+ numeric_values = [self._to_numeric(v) for v in values]
245
+ return statistics.median(numeric_values)
246
+ elif func in ["std", "standard_deviation"]:
247
+ if len(values) < 2:
248
+ return None
249
+ numeric_values = [self._to_numeric(v) for v in values]
250
+ return statistics.stdev(numeric_values)
251
+ elif func in ["percentile", "percentiles", "p", "pct"]:
252
+ if not values:
253
+ return None
254
+ numeric_values = sorted([self._to_numeric(v) for v in values])
255
+ percentile_values = agg_spec.get("percentile_values", [50]) # Default to median
256
+
257
+ if len(percentile_values) == 1:
258
+ # Single percentile
259
+ return self._calculate_percentile(numeric_values, percentile_values[0])
260
+ else:
261
+ # Multiple percentiles - return dict
262
+ result = {}
263
+ for p in percentile_values:
264
+ result[f"p{int(p)}"] = self._calculate_percentile(numeric_values, p)
265
+ return result
266
+ elif func in ["percentile_rank", "percentile_ranks", "pct_rank", "pct_ranks"]:
267
+ if not values:
268
+ return None
269
+ numeric_values = sorted([self._to_numeric(v) for v in values])
270
+ rank_values = agg_spec.get("rank_values", [])
271
+
272
+ if not rank_values:
273
+ raise TQLError("percentile_rank requires at least one value")
274
+
275
+ if len(rank_values) == 1:
276
+ # Single rank value
277
+ return self._calculate_percentile_rank(numeric_values, rank_values[0])
278
+ else:
279
+ # Multiple rank values - return dict
280
+ result = {}
281
+ for v in rank_values:
282
+ result[f"rank_{v}"] = self._calculate_percentile_rank(numeric_values, v)
283
+ return result
284
+ else:
285
+ raise TQLError(f"Unsupported aggregation function: {func}")
286
+
287
+ def _apply_modifiers(
288
+ self, results: List[Dict[str, Any]], aggregations: List[Dict[str, Any]]
289
+ ) -> List[Dict[str, Any]]:
290
+ """Apply top/bottom modifiers to results.
291
+
292
+ Args:
293
+ results: Aggregation results
294
+ aggregations: Aggregation specifications with modifiers
295
+
296
+ Returns:
297
+ Modified results
298
+ """
299
+ # Check if any aggregation has modifiers
300
+ for agg in aggregations:
301
+ if "modifier" in agg:
302
+ # Sort results based on the aggregation value
303
+ agg_key = agg.get("alias") or agg["function"]
304
+
305
+ # Get the value from the result
306
+ def get_sort_value(result, key=agg_key):
307
+ if "aggregations" in result:
308
+ return result["aggregations"].get(key, 0)
309
+ else:
310
+ return result.get(key, 0)
311
+
312
+ # Sort
313
+ reverse = agg["modifier"] == "top"
314
+ results = sorted(results, key=get_sort_value, reverse=reverse)
315
+
316
+ # Limit
317
+ limit = agg.get("limit", 10)
318
+ results = results[:limit]
319
+
320
+ break # Only apply first modifier found
321
+
322
+ return results
323
+
324
+ def _get_field_value(self, record: Dict[str, Any], field_path: str) -> Any:
325
+ """Get a field value from a record, supporting nested fields.
326
+
327
+ Args:
328
+ record: The record dictionary
329
+ field_path: Dot-separated field path
330
+
331
+ Returns:
332
+ The field value or None if not found
333
+ """
334
+ parts = field_path.split(".")
335
+ current = record
336
+
337
+ for part in parts:
338
+ if isinstance(current, dict) and part in current:
339
+ current = current[part]
340
+ else:
341
+ return None
342
+
343
+ return current
344
+
345
+ def _to_numeric(self, value: Any) -> Union[int, float]:
346
+ """Convert value to numeric type.
347
+
348
+ Args:
349
+ value: Value to convert
350
+
351
+ Returns:
352
+ Numeric value
353
+
354
+ Raises:
355
+ TQLError: If value cannot be converted
356
+ """
357
+ if isinstance(value, (int, float)):
358
+ return value
359
+
360
+ if isinstance(value, str):
361
+ try:
362
+ # Try int first
363
+ if "." not in value:
364
+ return int(value)
365
+ else:
366
+ return float(value)
367
+ except ValueError:
368
+ raise TQLError(
369
+ f"Cannot convert '{value}' to numeric value. " f"Ensure the field contains numeric data."
370
+ )
371
+
372
+ raise TQLError(
373
+ f"Cannot convert {type(value).__name__} to numeric value. " f"Ensure the field contains numeric data."
374
+ )
375
+
376
+ def _calculate_percentile(self, sorted_values: List[Union[int, float]], percentile: float) -> Optional[float]:
377
+ """Calculate the percentile value for a sorted list of values.
378
+
379
+ Args:
380
+ sorted_values: Sorted list of numeric values
381
+ percentile: Percentile to calculate (0-100)
382
+
383
+ Returns:
384
+ The percentile value
385
+ """
386
+ if not sorted_values:
387
+ return None
388
+
389
+ if percentile < 0 or percentile > 100:
390
+ raise TQLError(f"Percentile must be between 0 and 100, got {percentile}")
391
+
392
+ n = len(sorted_values)
393
+ if n == 1:
394
+ return sorted_values[0]
395
+
396
+ # Calculate the position using linear interpolation
397
+ pos = (n - 1) * (percentile / 100.0)
398
+ lower_idx = int(pos)
399
+ upper_idx = min(lower_idx + 1, n - 1)
400
+
401
+ if lower_idx == upper_idx:
402
+ return sorted_values[lower_idx]
403
+
404
+ # Linear interpolation between two values
405
+ lower_value = sorted_values[lower_idx]
406
+ upper_value = sorted_values[upper_idx]
407
+ fraction = pos - lower_idx
408
+
409
+ return lower_value + fraction * (upper_value - lower_value)
410
+
411
+ def _calculate_percentile_rank(self, sorted_values: List[Union[int, float]], value: float) -> Optional[float]:
412
+ """Calculate the percentile rank of a value within a sorted list.
413
+
414
+ Args:
415
+ sorted_values: Sorted list of numeric values
416
+ value: Value to find percentile rank for
417
+
418
+ Returns:
419
+ The percentile rank (0-100)
420
+ """
421
+ if not sorted_values:
422
+ return None
423
+
424
+ n = len(sorted_values)
425
+
426
+ # Count how many values are less than the target value
427
+ count_less = 0
428
+ count_equal = 0
429
+
430
+ for v in sorted_values:
431
+ if v < value:
432
+ count_less += 1
433
+ elif v == value:
434
+ count_equal += 1
435
+
436
+ # Calculate percentile rank
437
+ # If value is in the list, use midpoint of its range
438
+ if count_equal > 0:
439
+ rank = (count_less + count_equal / 2.0) / n * 100
440
+ else:
441
+ # Value not in list, interpolate
442
+ rank = count_less / n * 100
443
+
444
+ return round(rank, 2)