tellaro-query-language 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tellaro_query_language-0.1.0.dist-info/LICENSE +21 -0
- tellaro_query_language-0.1.0.dist-info/METADATA +401 -0
- tellaro_query_language-0.1.0.dist-info/RECORD +56 -0
- tellaro_query_language-0.1.0.dist-info/WHEEL +4 -0
- tellaro_query_language-0.1.0.dist-info/entry_points.txt +7 -0
- tql/__init__.py +47 -0
- tql/analyzer.py +385 -0
- tql/cache/__init__.py +7 -0
- tql/cache/base.py +25 -0
- tql/cache/memory.py +63 -0
- tql/cache/redis.py +68 -0
- tql/core.py +929 -0
- tql/core_components/README.md +92 -0
- tql/core_components/__init__.py +20 -0
- tql/core_components/file_operations.py +113 -0
- tql/core_components/opensearch_operations.py +869 -0
- tql/core_components/stats_operations.py +200 -0
- tql/core_components/validation_operations.py +599 -0
- tql/evaluator.py +379 -0
- tql/evaluator_components/README.md +131 -0
- tql/evaluator_components/__init__.py +17 -0
- tql/evaluator_components/field_access.py +176 -0
- tql/evaluator_components/special_expressions.py +296 -0
- tql/evaluator_components/value_comparison.py +315 -0
- tql/exceptions.py +160 -0
- tql/geoip_normalizer.py +233 -0
- tql/mutator_analyzer.py +830 -0
- tql/mutators/__init__.py +222 -0
- tql/mutators/base.py +78 -0
- tql/mutators/dns.py +316 -0
- tql/mutators/encoding.py +218 -0
- tql/mutators/geo.py +363 -0
- tql/mutators/list.py +212 -0
- tql/mutators/network.py +163 -0
- tql/mutators/security.py +225 -0
- tql/mutators/string.py +165 -0
- tql/opensearch.py +78 -0
- tql/opensearch_components/README.md +130 -0
- tql/opensearch_components/__init__.py +17 -0
- tql/opensearch_components/field_mapping.py +399 -0
- tql/opensearch_components/lucene_converter.py +305 -0
- tql/opensearch_components/query_converter.py +775 -0
- tql/opensearch_mappings.py +309 -0
- tql/opensearch_stats.py +451 -0
- tql/parser.py +1363 -0
- tql/parser_components/README.md +72 -0
- tql/parser_components/__init__.py +20 -0
- tql/parser_components/ast_builder.py +162 -0
- tql/parser_components/error_analyzer.py +101 -0
- tql/parser_components/field_extractor.py +112 -0
- tql/parser_components/grammar.py +473 -0
- tql/post_processor.py +737 -0
- tql/scripts.py +124 -0
- tql/stats_evaluator.py +444 -0
- tql/stats_transformer.py +184 -0
- tql/validators.py +110 -0
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
"""Statistics operations for TQL.
|
|
2
|
+
|
|
3
|
+
This module handles statistical aggregations and analysis for TQL queries.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Any, Dict, List, Optional, Union
|
|
7
|
+
|
|
8
|
+
from ..exceptions import TQLParseError, TQLValueError
|
|
9
|
+
from ..parser import TQLParser
|
|
10
|
+
from ..stats_evaluator import TQLStatsEvaluator
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class StatsOperations:
|
|
14
|
+
"""Handles statistics operations for TQL."""
|
|
15
|
+
|
|
16
|
+
def __init__(self, parser: TQLParser, field_mappings: Optional[Dict[str, Any]] = None):
|
|
17
|
+
"""Initialize statistics operations.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
parser: TQL parser instance
|
|
21
|
+
field_mappings: Field mappings for evaluation
|
|
22
|
+
"""
|
|
23
|
+
self.parser = parser
|
|
24
|
+
self.stats_evaluator = TQLStatsEvaluator()
|
|
25
|
+
self.field_mappings = field_mappings or {}
|
|
26
|
+
|
|
27
|
+
def stats(self, data: Union[List[Dict], str], stats_query: str) -> Dict[str, Any]:
|
|
28
|
+
"""Execute a statistics query on data.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
data: List of records or file path
|
|
32
|
+
stats_query: Stats query string (e.g., "| stats count() by status")
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
Dictionary containing aggregation results
|
|
36
|
+
|
|
37
|
+
Raises:
|
|
38
|
+
TQLParseError: If query parsing fails
|
|
39
|
+
TQLValueError: If query is invalid
|
|
40
|
+
"""
|
|
41
|
+
# Ensure the query starts with | stats
|
|
42
|
+
if not stats_query.strip().startswith("| stats"):
|
|
43
|
+
stats_query = "| stats " + stats_query.strip()
|
|
44
|
+
|
|
45
|
+
# Parse the stats expression
|
|
46
|
+
try:
|
|
47
|
+
parsed = self.parser.parse(stats_query)
|
|
48
|
+
except TQLParseError as e:
|
|
49
|
+
raise TQLParseError(f"Invalid stats query: {str(e)}")
|
|
50
|
+
|
|
51
|
+
# Verify it's a stats expression
|
|
52
|
+
if parsed.get("type") != "stats_expr":
|
|
53
|
+
raise TQLValueError("Query must be a stats expression starting with '| stats'")
|
|
54
|
+
|
|
55
|
+
# Load data if it's a file path
|
|
56
|
+
if isinstance(data, str):
|
|
57
|
+
from .file_operations import FileOperations
|
|
58
|
+
|
|
59
|
+
file_ops = FileOperations()
|
|
60
|
+
records = file_ops.load_file(data)
|
|
61
|
+
else:
|
|
62
|
+
records = data
|
|
63
|
+
|
|
64
|
+
# Execute the stats query
|
|
65
|
+
return self.stats_evaluator.evaluate_stats(records, parsed)
|
|
66
|
+
|
|
67
|
+
def query_stats(self, data: Union[List[Dict], str], query: str) -> Dict[str, Any]:
|
|
68
|
+
"""Execute a TQL query with stats aggregation.
|
|
69
|
+
|
|
70
|
+
This combines filtering and statistical aggregation in one query.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
data: List of records or file path
|
|
74
|
+
query: Combined query string (e.g., "status = 'active' | stats count() by type")
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Dictionary containing aggregation results
|
|
78
|
+
|
|
79
|
+
Raises:
|
|
80
|
+
TQLParseError: If query parsing fails
|
|
81
|
+
"""
|
|
82
|
+
# Parse the combined query
|
|
83
|
+
parsed = self.parser.parse(query)
|
|
84
|
+
|
|
85
|
+
# Check if it's a query with stats
|
|
86
|
+
if parsed.get("type") != "query_with_stats":
|
|
87
|
+
raise TQLValueError("Query must contain both filter and stats parts separated by |")
|
|
88
|
+
|
|
89
|
+
# Load data if it's a file path
|
|
90
|
+
if isinstance(data, str):
|
|
91
|
+
from .file_operations import FileOperations
|
|
92
|
+
|
|
93
|
+
file_ops = FileOperations()
|
|
94
|
+
records = file_ops.load_file(data)
|
|
95
|
+
else:
|
|
96
|
+
records = data
|
|
97
|
+
|
|
98
|
+
# First apply the filter
|
|
99
|
+
from ..evaluator import TQLEvaluator
|
|
100
|
+
|
|
101
|
+
evaluator = TQLEvaluator()
|
|
102
|
+
filtered_records = []
|
|
103
|
+
filter_ast = parsed["filter"]
|
|
104
|
+
|
|
105
|
+
for record in records:
|
|
106
|
+
if evaluator._evaluate_node(filter_ast, record, self.field_mappings):
|
|
107
|
+
filtered_records.append(record)
|
|
108
|
+
|
|
109
|
+
# Then apply stats
|
|
110
|
+
stats_ast = parsed["stats"]
|
|
111
|
+
return self.stats_evaluator.evaluate_stats(filtered_records, stats_ast)
|
|
112
|
+
|
|
113
|
+
def analyze_stats_query(self, query: str) -> Dict[str, Any]: # noqa: C901
|
|
114
|
+
"""Analyze a stats query for performance and correctness.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
query: Stats query string
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
Analysis results including AST and any warnings
|
|
121
|
+
"""
|
|
122
|
+
# Parse the query
|
|
123
|
+
try:
|
|
124
|
+
if not query.strip().startswith("| stats") and "|" not in query:
|
|
125
|
+
query = "| stats " + query.strip()
|
|
126
|
+
|
|
127
|
+
ast = self.parser.parse(query)
|
|
128
|
+
except TQLParseError as e:
|
|
129
|
+
return {"valid": False, "error": str(e), "type": "parse_error"}
|
|
130
|
+
|
|
131
|
+
# Determine query type
|
|
132
|
+
if ast.get("type") == "stats_expr":
|
|
133
|
+
query_type = "stats_only"
|
|
134
|
+
stats_ast = ast
|
|
135
|
+
filter_ast = None
|
|
136
|
+
elif ast.get("type") == "query_with_stats":
|
|
137
|
+
query_type = "filter_and_stats"
|
|
138
|
+
stats_ast = ast["stats"]
|
|
139
|
+
filter_ast = ast["filter"]
|
|
140
|
+
else:
|
|
141
|
+
return {"valid": False, "error": "Query must be a stats expression", "type": "invalid_query_type"}
|
|
142
|
+
|
|
143
|
+
# Analyze the stats portion
|
|
144
|
+
aggregations = stats_ast.get("aggregations", [])
|
|
145
|
+
group_by = stats_ast.get("group_by", [])
|
|
146
|
+
|
|
147
|
+
warnings = []
|
|
148
|
+
suggestions = []
|
|
149
|
+
|
|
150
|
+
# Check for common issues
|
|
151
|
+
if not aggregations:
|
|
152
|
+
warnings.append("No aggregation functions specified")
|
|
153
|
+
suggestions.append("Add aggregation functions like count(), sum(field), avg(field)")
|
|
154
|
+
|
|
155
|
+
# Check for duplicate aggregations without aliases
|
|
156
|
+
agg_fields = []
|
|
157
|
+
for agg in aggregations:
|
|
158
|
+
if not agg.get("alias"):
|
|
159
|
+
key = f"{agg['function']}({agg['field']})"
|
|
160
|
+
if key in agg_fields:
|
|
161
|
+
warnings.append(f"Duplicate aggregation without alias: {key}")
|
|
162
|
+
suggestions.append(f"Use aliases to distinguish: {key} as alias1, {key} as alias2")
|
|
163
|
+
agg_fields.append(key)
|
|
164
|
+
|
|
165
|
+
# Build analysis result
|
|
166
|
+
result = {
|
|
167
|
+
"valid": True,
|
|
168
|
+
"type": query_type,
|
|
169
|
+
"query": query,
|
|
170
|
+
"ast": ast,
|
|
171
|
+
"aggregations": aggregations,
|
|
172
|
+
"group_by": group_by,
|
|
173
|
+
"warnings": warnings,
|
|
174
|
+
"suggestions": suggestions,
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
if filter_ast:
|
|
178
|
+
result["filter"] = self._analyze_filter(filter_ast)
|
|
179
|
+
|
|
180
|
+
return result
|
|
181
|
+
|
|
182
|
+
def _analyze_filter(self, ast: Dict[str, Any]) -> Dict[str, Any]:
|
|
183
|
+
"""Analyze the filter portion of a query."""
|
|
184
|
+
fields = []
|
|
185
|
+
operators = []
|
|
186
|
+
|
|
187
|
+
def traverse(node):
|
|
188
|
+
if isinstance(node, dict):
|
|
189
|
+
node_type = node.get("type")
|
|
190
|
+
if node_type == "comparison":
|
|
191
|
+
fields.append(node.get("field"))
|
|
192
|
+
operators.append(node.get("operator"))
|
|
193
|
+
elif node_type == "logical_op":
|
|
194
|
+
operators.append(node.get("operator"))
|
|
195
|
+
traverse(node.get("left"))
|
|
196
|
+
traverse(node.get("right"))
|
|
197
|
+
|
|
198
|
+
traverse(ast)
|
|
199
|
+
|
|
200
|
+
return {"fields": list(set(fields)), "operators": list(set(operators))}
|