vtlengine 1.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +153 -100
- vtlengine/API/__init__.py +109 -67
- vtlengine/AST/ASTConstructor.py +188 -98
- vtlengine/AST/ASTConstructorModules/Expr.py +306 -200
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +172 -102
- vtlengine/AST/ASTConstructorModules/Terminals.py +158 -95
- vtlengine/AST/ASTEncoders.py +1 -1
- vtlengine/AST/ASTTemplate.py +8 -9
- vtlengine/AST/ASTVisitor.py +8 -12
- vtlengine/AST/DAG/__init__.py +43 -35
- vtlengine/AST/DAG/_words.py +4 -4
- vtlengine/AST/Grammar/lexer.py +732 -142
- vtlengine/AST/Grammar/parser.py +2188 -826
- vtlengine/AST/Grammar/tokens.py +128 -128
- vtlengine/AST/VtlVisitor.py +7 -4
- vtlengine/AST/__init__.py +22 -11
- vtlengine/DataTypes/NumericTypesHandling.py +5 -4
- vtlengine/DataTypes/TimeHandling.py +194 -301
- vtlengine/DataTypes/__init__.py +304 -218
- vtlengine/Exceptions/__init__.py +52 -27
- vtlengine/Exceptions/messages.py +134 -62
- vtlengine/Interpreter/__init__.py +781 -487
- vtlengine/Model/__init__.py +165 -121
- vtlengine/Operators/Aggregation.py +156 -95
- vtlengine/Operators/Analytic.py +115 -59
- vtlengine/Operators/Assignment.py +7 -4
- vtlengine/Operators/Boolean.py +27 -32
- vtlengine/Operators/CastOperator.py +177 -131
- vtlengine/Operators/Clause.py +137 -99
- vtlengine/Operators/Comparison.py +148 -117
- vtlengine/Operators/Conditional.py +149 -98
- vtlengine/Operators/General.py +68 -47
- vtlengine/Operators/HROperators.py +91 -72
- vtlengine/Operators/Join.py +217 -118
- vtlengine/Operators/Numeric.py +89 -44
- vtlengine/Operators/RoleSetter.py +16 -15
- vtlengine/Operators/Set.py +61 -36
- vtlengine/Operators/String.py +213 -139
- vtlengine/Operators/Time.py +334 -216
- vtlengine/Operators/Validation.py +117 -76
- vtlengine/Operators/__init__.py +340 -213
- vtlengine/Utils/__init__.py +195 -40
- vtlengine/__init__.py +1 -1
- vtlengine/files/output/__init__.py +15 -6
- vtlengine/files/output/_time_period_representation.py +10 -9
- vtlengine/files/parser/__init__.py +77 -52
- vtlengine/files/parser/_rfc_dialect.py +6 -5
- vtlengine/files/parser/_time_checking.py +46 -37
- vtlengine-1.0.1.dist-info/METADATA +236 -0
- vtlengine-1.0.1.dist-info/RECORD +58 -0
- {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/WHEEL +1 -1
- vtlengine-1.0.dist-info/METADATA +0 -104
- vtlengine-1.0.dist-info/RECORD +0 -58
- {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/LICENSE.md +0 -0
|
@@ -4,10 +4,16 @@ from pathlib import Path
|
|
|
4
4
|
from typing import Any, Dict, List, Optional, Union
|
|
5
5
|
|
|
6
6
|
import vtlengine.AST as AST
|
|
7
|
+
import vtlengine.Exceptions
|
|
7
8
|
import vtlengine.Operators as Operators
|
|
8
9
|
import pandas as pd
|
|
9
|
-
from vtlengine.DataTypes import
|
|
10
|
-
|
|
10
|
+
from vtlengine.DataTypes import (
|
|
11
|
+
BASIC_TYPES,
|
|
12
|
+
check_unary_implicit_promotion,
|
|
13
|
+
ScalarType,
|
|
14
|
+
Boolean,
|
|
15
|
+
SCALAR_TYPES_CLASS_REVERSE,
|
|
16
|
+
)
|
|
11
17
|
from vtlengine.Operators.Aggregation import extract_grouping_identifiers
|
|
12
18
|
from vtlengine.Operators.Assignment import Assignment
|
|
13
19
|
from vtlengine.Operators.CastOperator import Cast
|
|
@@ -19,22 +25,66 @@ from vtlengine.Operators.Numeric import Round, Trunc
|
|
|
19
25
|
from vtlengine.Operators.String import Instr, Replace, Substr
|
|
20
26
|
from vtlengine.Operators.Time import Fill_time_series, Time_Aggregation, Current_Date
|
|
21
27
|
from vtlengine.Operators.Validation import Check, Check_Datapoint, Check_Hierarchy
|
|
22
|
-
from vtlengine.Utils import
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
28
|
+
from vtlengine.Utils import (
|
|
29
|
+
AGGREGATION_MAPPING,
|
|
30
|
+
ANALYTIC_MAPPING,
|
|
31
|
+
BINARY_MAPPING,
|
|
32
|
+
JOIN_MAPPING,
|
|
33
|
+
REGULAR_AGGREGATION_MAPPING,
|
|
34
|
+
ROLE_SETTER_MAPPING,
|
|
35
|
+
SET_MAPPING,
|
|
36
|
+
UNARY_MAPPING,
|
|
37
|
+
THEN_ELSE,
|
|
38
|
+
HR_UNARY_MAPPING,
|
|
39
|
+
HR_COMP_MAPPING,
|
|
40
|
+
HR_NUM_BINARY_MAPPING,
|
|
41
|
+
)
|
|
42
|
+
from vtlengine.files.output import save_datapoints
|
|
43
|
+
from vtlengine.files.output._time_period_representation import TimePeriodRepresentation
|
|
26
44
|
from vtlengine.files.parser import load_datapoints, _fill_dataset_empty_data
|
|
27
45
|
|
|
28
46
|
from vtlengine.AST.ASTTemplate import ASTTemplate
|
|
29
|
-
from vtlengine.AST.DAG import HRDAGAnalyzer
|
|
30
|
-
from vtlengine.AST.
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
47
|
+
from vtlengine.AST.DAG import HRDAGAnalyzer
|
|
48
|
+
from vtlengine.AST.DAG._words import GLOBAL, DELETE, INSERT
|
|
49
|
+
from vtlengine.AST.Grammar.tokens import (
|
|
50
|
+
AGGREGATE,
|
|
51
|
+
ALL,
|
|
52
|
+
APPLY,
|
|
53
|
+
AS,
|
|
54
|
+
BETWEEN,
|
|
55
|
+
CHECK_DATAPOINT,
|
|
56
|
+
DROP,
|
|
57
|
+
EXISTS_IN,
|
|
58
|
+
EXTERNAL,
|
|
59
|
+
FILTER,
|
|
60
|
+
HAVING,
|
|
61
|
+
INSTR,
|
|
62
|
+
KEEP,
|
|
63
|
+
MEMBERSHIP,
|
|
64
|
+
REPLACE,
|
|
65
|
+
ROUND,
|
|
66
|
+
SUBSTR,
|
|
67
|
+
TRUNC,
|
|
68
|
+
WHEN,
|
|
69
|
+
FILL_TIME_SERIES,
|
|
70
|
+
CAST,
|
|
71
|
+
CHECK_HIERARCHY,
|
|
72
|
+
HIERARCHY,
|
|
73
|
+
EQ,
|
|
74
|
+
CURRENT_DATE,
|
|
75
|
+
CALC,
|
|
76
|
+
)
|
|
34
77
|
from vtlengine.Exceptions import SemanticError
|
|
35
|
-
from vtlengine.Model import
|
|
36
|
-
|
|
37
|
-
|
|
78
|
+
from vtlengine.Model import (
|
|
79
|
+
DataComponent,
|
|
80
|
+
Dataset,
|
|
81
|
+
ExternalRoutine,
|
|
82
|
+
Role,
|
|
83
|
+
Scalar,
|
|
84
|
+
ScalarSet,
|
|
85
|
+
Component,
|
|
86
|
+
ValueDomain,
|
|
87
|
+
)
|
|
38
88
|
|
|
39
89
|
|
|
40
90
|
# noinspection PyTypeChecker
|
|
@@ -47,13 +97,13 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
47
97
|
# Analysis mode
|
|
48
98
|
only_semantic: bool = False
|
|
49
99
|
# Memory efficient
|
|
50
|
-
ds_analysis: Optional[
|
|
100
|
+
ds_analysis: Optional[Dict[str, Any]] = None
|
|
51
101
|
datapoints_paths: Optional[Dict[str, Path]] = None
|
|
52
102
|
output_path: Optional[Union[str, Path]] = None
|
|
53
103
|
# Time Period Representation
|
|
54
104
|
time_period_representation: Optional[TimePeriodRepresentation] = None
|
|
55
105
|
# Flags to change behavior
|
|
56
|
-
nested_if = False
|
|
106
|
+
nested_if: Union[str, bool] = False
|
|
57
107
|
is_from_assignment: bool = False
|
|
58
108
|
is_from_component_assignment: bool = False
|
|
59
109
|
is_from_regular_aggregation: bool = False
|
|
@@ -70,50 +120,55 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
70
120
|
regular_aggregation_dataset: Optional[Dataset] = None
|
|
71
121
|
aggregation_grouping: Optional[List[str]] = None
|
|
72
122
|
aggregation_dataset: Optional[Dataset] = None
|
|
73
|
-
then_condition_dataset: Optional[List[
|
|
74
|
-
else_condition_dataset: Optional[List[
|
|
123
|
+
then_condition_dataset: Optional[List[Any]] = None
|
|
124
|
+
else_condition_dataset: Optional[List[Any]] = None
|
|
75
125
|
ruleset_dataset: Optional[Dataset] = None
|
|
76
126
|
rule_data: Optional[pd.DataFrame] = None
|
|
77
|
-
ruleset_signature: Dict[str, str] = None
|
|
78
|
-
udo_params: List[Dict[str, Any]] = None
|
|
127
|
+
ruleset_signature: Optional[Dict[str, str]] = None
|
|
128
|
+
udo_params: Optional[List[Dict[str, Any]]] = None
|
|
79
129
|
hr_agg_rules_computed: Optional[Dict[str, pd.DataFrame]] = None
|
|
80
130
|
ruleset_mode: Optional[str] = None
|
|
81
131
|
hr_input: Optional[str] = None
|
|
82
132
|
hr_partial_is_valid: Optional[List[bool]] = None
|
|
83
133
|
hr_condition: Optional[Dict[str, str]] = None
|
|
84
134
|
# DL
|
|
85
|
-
dprs: Dict[str, Dict[str, Any]] = None
|
|
86
|
-
udos: Dict[str, Dict[str, Any]] = None
|
|
87
|
-
hrs: Dict[str, Dict[str, Any]] = None
|
|
135
|
+
dprs: Optional[Dict[str, Optional[Dict[str, Any]]]] = None
|
|
136
|
+
udos: Optional[Dict[str, Optional[Dict[str, Any]]]] = None
|
|
137
|
+
hrs: Optional[Dict[str, Optional[Dict[str, Any]]]] = None
|
|
88
138
|
|
|
89
139
|
# **********************************
|
|
90
140
|
# * *
|
|
91
141
|
# * Memory efficient *
|
|
92
142
|
# * *
|
|
93
143
|
# **********************************
|
|
94
|
-
def _load_datapoints_efficient(self, statement_num: int):
|
|
144
|
+
def _load_datapoints_efficient(self, statement_num: int) -> None:
|
|
95
145
|
if self.datapoints_paths is None:
|
|
96
146
|
return
|
|
147
|
+
if self.ds_analysis is None:
|
|
148
|
+
return
|
|
97
149
|
if statement_num not in self.ds_analysis[INSERT]:
|
|
98
150
|
return
|
|
99
151
|
for ds_name in self.ds_analysis[INSERT][statement_num]:
|
|
100
152
|
if ds_name in self.datapoints_paths:
|
|
101
|
-
self.datasets[ds_name].data = load_datapoints(
|
|
102
|
-
|
|
103
|
-
|
|
153
|
+
self.datasets[ds_name].data = load_datapoints(
|
|
154
|
+
self.datasets[ds_name].components, ds_name, self.datapoints_paths[ds_name]
|
|
155
|
+
)
|
|
104
156
|
elif ds_name in self.datasets and self.datasets[ds_name].data is None:
|
|
105
157
|
_fill_dataset_empty_data(self.datasets[ds_name])
|
|
106
158
|
|
|
107
|
-
def _save_datapoints_efficient(self, statement_num: int):
|
|
159
|
+
def _save_datapoints_efficient(self, statement_num: int) -> None:
|
|
108
160
|
if self.output_path is None:
|
|
109
161
|
# Keeping the data in memory if no output path is provided
|
|
110
162
|
return
|
|
163
|
+
if self.ds_analysis is None:
|
|
164
|
+
return
|
|
111
165
|
if statement_num not in self.ds_analysis[DELETE]:
|
|
112
166
|
return
|
|
113
167
|
for ds_name in self.ds_analysis[DELETE][statement_num]:
|
|
114
|
-
if (
|
|
115
|
-
|
|
116
|
-
|
|
168
|
+
if (
|
|
169
|
+
ds_name not in self.datasets
|
|
170
|
+
or not isinstance(self.datasets[ds_name], Dataset)
|
|
171
|
+
or self.datasets[ds_name].data is None
|
|
117
172
|
):
|
|
118
173
|
continue
|
|
119
174
|
if ds_name in self.ds_analysis[GLOBAL]:
|
|
@@ -122,9 +177,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
122
177
|
continue
|
|
123
178
|
|
|
124
179
|
# Saving only datasets, no scalars
|
|
125
|
-
save_datapoints(
|
|
126
|
-
|
|
127
|
-
|
|
180
|
+
save_datapoints(
|
|
181
|
+
self.time_period_representation, self.datasets[ds_name], self.output_path
|
|
182
|
+
)
|
|
128
183
|
self.datasets[ds_name].data = None
|
|
129
184
|
|
|
130
185
|
# **********************************
|
|
@@ -142,6 +197,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
142
197
|
results = {}
|
|
143
198
|
for child in node.children:
|
|
144
199
|
if isinstance(child, (AST.Assignment, AST.PersistentAssignment)):
|
|
200
|
+
vtlengine.Exceptions.dataset_output = child.left.value # type: ignore[attr-defined]
|
|
145
201
|
self._load_datapoints_efficient(statement_num)
|
|
146
202
|
if not isinstance(child, (AST.HRuleset, AST.DPRuleset, AST.Operator)):
|
|
147
203
|
if not isinstance(child, (AST.Assignment, AST.PersistentAssignment)):
|
|
@@ -157,8 +213,11 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
157
213
|
|
|
158
214
|
if result is None:
|
|
159
215
|
continue
|
|
216
|
+
|
|
217
|
+
# Removing output dataset
|
|
218
|
+
vtlengine.Exceptions.dataset_output = None
|
|
160
219
|
# Save results
|
|
161
|
-
self.datasets[result.name] = result
|
|
220
|
+
self.datasets[result.name] = copy(result)
|
|
162
221
|
results[result.name] = result
|
|
163
222
|
self._save_datapoints_efficient(statement_num)
|
|
164
223
|
statement_num += 1
|
|
@@ -190,14 +249,14 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
190
249
|
if len(param_info) > 1:
|
|
191
250
|
previous_default = param_info[0]
|
|
192
251
|
for i in [1, len(param_info) - 1]:
|
|
193
|
-
if previous_default
|
|
252
|
+
if previous_default and not param_info[i]:
|
|
194
253
|
raise SemanticError("1-3-12")
|
|
195
254
|
previous_default = param_info[i]
|
|
196
255
|
|
|
197
256
|
self.udos[node.op] = {
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
257
|
+
"params": param_info,
|
|
258
|
+
"expression": node.expression,
|
|
259
|
+
"output": node.output_type,
|
|
201
260
|
}
|
|
202
261
|
|
|
203
262
|
def visit_DPRuleset(self, node: AST.DPRuleset) -> None:
|
|
@@ -209,27 +268,35 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
209
268
|
raise SemanticError("1-4-1-7", type="Datapoint Ruleset", name=node.name)
|
|
210
269
|
if len(rule_names) == 0:
|
|
211
270
|
for i, rule in enumerate(node.rules):
|
|
212
|
-
rule.name = i + 1
|
|
271
|
+
rule.name = (i + 1).__str__()
|
|
213
272
|
|
|
214
273
|
if len(rule_names) != len(set(rule_names)):
|
|
215
274
|
not_unique = [name for name in rule_names if rule_names.count(name) > 1]
|
|
216
|
-
raise SemanticError(
|
|
217
|
-
|
|
218
|
-
|
|
275
|
+
raise SemanticError(
|
|
276
|
+
"1-4-1-5",
|
|
277
|
+
type="Datapoint Ruleset",
|
|
278
|
+
names=", ".join(not_unique),
|
|
279
|
+
ruleset_name=node.name,
|
|
280
|
+
)
|
|
219
281
|
|
|
220
282
|
# Signature has the actual parameters names or aliases if provided
|
|
221
283
|
signature_actual_names = {}
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
284
|
+
if not isinstance(node.params, AST.DefIdentifier):
|
|
285
|
+
for param in node.params:
|
|
286
|
+
if param.alias is not None:
|
|
287
|
+
signature_actual_names[param.alias] = param.value
|
|
288
|
+
else:
|
|
289
|
+
signature_actual_names[param.value] = param.value
|
|
227
290
|
|
|
228
291
|
ruleset_data = {
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
292
|
+
"rules": node.rules,
|
|
293
|
+
"signature": signature_actual_names,
|
|
294
|
+
"params": (
|
|
295
|
+
[x.value for x in node.params]
|
|
296
|
+
if not isinstance(node.params, AST.DefIdentifier)
|
|
297
|
+
else []
|
|
298
|
+
),
|
|
299
|
+
"signature_type": node.signature_type,
|
|
233
300
|
}
|
|
234
301
|
|
|
235
302
|
# Adding the ruleset to the dprs dictionary
|
|
@@ -252,7 +319,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
252
319
|
raise ValueError("All rules must have a name, or none of them")
|
|
253
320
|
if len(rule_names) == 0:
|
|
254
321
|
for i, rule in enumerate(node.rules):
|
|
255
|
-
rule.name = i + 1
|
|
322
|
+
rule.name = (i + 1).__str__()
|
|
256
323
|
|
|
257
324
|
cond_comp = []
|
|
258
325
|
if isinstance(node.element, list):
|
|
@@ -262,18 +329,21 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
262
329
|
signature_actual_name = node.element.value
|
|
263
330
|
|
|
264
331
|
ruleset_data = {
|
|
265
|
-
|
|
266
|
-
|
|
332
|
+
"rules": node.rules,
|
|
333
|
+
"signature": signature_actual_name,
|
|
267
334
|
"condition": cond_comp,
|
|
268
|
-
|
|
335
|
+
"node": node,
|
|
269
336
|
}
|
|
270
337
|
|
|
271
338
|
self.hrs[node.name] = ruleset_data
|
|
272
339
|
|
|
273
340
|
# Execution Language
|
|
274
341
|
def visit_Assignment(self, node: AST.Assignment) -> Any:
|
|
275
|
-
if
|
|
276
|
-
|
|
342
|
+
if (
|
|
343
|
+
self.is_from_join
|
|
344
|
+
and isinstance(node.left, AST.Identifier)
|
|
345
|
+
and node.left.kind == "ComponentID"
|
|
346
|
+
):
|
|
277
347
|
self.is_from_component_assignment = True
|
|
278
348
|
self.is_from_assignment = True
|
|
279
349
|
left_operand: str = self.visit(node.left)
|
|
@@ -285,32 +355,37 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
285
355
|
def visit_PersistentAssignment(self, node: AST.PersistentAssignment) -> Any:
|
|
286
356
|
return self.visit_Assignment(node)
|
|
287
357
|
|
|
288
|
-
def visit_BinOp(self, node: AST.BinOp) ->
|
|
358
|
+
def visit_BinOp(self, node: AST.BinOp) -> Any:
|
|
289
359
|
|
|
290
360
|
is_from_if = False
|
|
291
|
-
if
|
|
292
|
-
|
|
361
|
+
if (
|
|
362
|
+
not self.is_from_condition
|
|
363
|
+
and node.op != MEMBERSHIP
|
|
364
|
+
and self.if_stack is not None
|
|
365
|
+
and len(self.if_stack) > 0
|
|
366
|
+
):
|
|
293
367
|
is_from_if = self.is_from_if
|
|
294
368
|
self.is_from_if = False
|
|
295
369
|
|
|
296
370
|
if self.is_from_join and node.op in [MEMBERSHIP, AGGREGATE]:
|
|
297
|
-
if
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
371
|
+
if hasattr(node.left, "value") and hasattr(node.right, "value"):
|
|
372
|
+
if self.udo_params is not None and node.right.value in self.udo_params[-1]:
|
|
373
|
+
comp_name = f"{node.left.value}#{self.udo_params[-1][node.right.value]}"
|
|
374
|
+
else:
|
|
375
|
+
comp_name = f"{node.left.value}#{node.right.value}"
|
|
376
|
+
ast_var_id = AST.VarID(value=comp_name)
|
|
377
|
+
return self.visit(ast_var_id)
|
|
378
|
+
left_operand = self.visit(node.left)
|
|
379
|
+
right_operand = self.visit(node.right)
|
|
306
380
|
if is_from_if:
|
|
307
381
|
left_operand, right_operand = self.merge_then_else_datasets(left_operand, right_operand)
|
|
308
382
|
if node.op == MEMBERSHIP:
|
|
309
|
-
if right_operand not in left_operand.components and
|
|
310
|
-
right_operand = right_operand.split(
|
|
383
|
+
if right_operand not in left_operand.components and "#" in right_operand:
|
|
384
|
+
right_operand = right_operand.split("#")[1]
|
|
311
385
|
if self.is_from_component_assignment:
|
|
312
|
-
return BINARY_MAPPING[node.op].analyze(
|
|
313
|
-
|
|
386
|
+
return BINARY_MAPPING[node.op].analyze(
|
|
387
|
+
left_operand, right_operand, self.is_from_component_assignment
|
|
388
|
+
)
|
|
314
389
|
elif self.is_from_regular_aggregation:
|
|
315
390
|
raise SemanticError("1-1-6-6", dataset_name=left_operand, comp_name=right_operand)
|
|
316
391
|
elif len(left_operand.get_identifiers()) == 0:
|
|
@@ -321,7 +396,11 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
321
396
|
operand = self.visit(node.operand)
|
|
322
397
|
if node.op not in UNARY_MAPPING and node.op not in ROLE_SETTER_MAPPING:
|
|
323
398
|
raise NotImplementedError
|
|
324
|
-
if
|
|
399
|
+
if (
|
|
400
|
+
self.is_from_regular_aggregation
|
|
401
|
+
and self.regular_aggregation_dataset is not None
|
|
402
|
+
and node.op in ROLE_SETTER_MAPPING
|
|
403
|
+
):
|
|
325
404
|
if self.regular_aggregation_dataset.data is None:
|
|
326
405
|
data_size = 0
|
|
327
406
|
else:
|
|
@@ -335,9 +414,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
335
414
|
if node.operand is not None:
|
|
336
415
|
self.visit(node.operand)
|
|
337
416
|
operand = self.aggregation_dataset
|
|
338
|
-
elif self.is_from_regular_aggregation:
|
|
417
|
+
elif self.is_from_regular_aggregation and self.regular_aggregation_dataset is not None:
|
|
339
418
|
operand = self.regular_aggregation_dataset
|
|
340
|
-
if node.operand is not None:
|
|
419
|
+
if node.operand is not None and operand is not None:
|
|
341
420
|
op_comp: DataComponent = self.visit(node.operand)
|
|
342
421
|
comps_to_keep = {}
|
|
343
422
|
for comp_name, comp in self.regular_aggregation_dataset.components.items():
|
|
@@ -347,16 +426,14 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
347
426
|
name=op_comp.name,
|
|
348
427
|
data_type=op_comp.data_type,
|
|
349
428
|
role=op_comp.role,
|
|
350
|
-
nullable=op_comp.nullable
|
|
429
|
+
nullable=op_comp.nullable,
|
|
351
430
|
)
|
|
352
431
|
if operand.data is not None:
|
|
353
432
|
data_to_keep = operand.data[operand.get_identifiers_names()]
|
|
354
433
|
data_to_keep[op_comp.name] = op_comp.data
|
|
355
434
|
else:
|
|
356
435
|
data_to_keep = None
|
|
357
|
-
operand = Dataset(name=operand.name,
|
|
358
|
-
components=comps_to_keep,
|
|
359
|
-
data=data_to_keep)
|
|
436
|
+
operand = Dataset(name=operand.name, components=comps_to_keep, data=data_to_keep)
|
|
360
437
|
else:
|
|
361
438
|
operand = self.visit(node.operand)
|
|
362
439
|
|
|
@@ -370,38 +447,42 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
370
447
|
if node.having_clause is not None and node.grouping is None:
|
|
371
448
|
raise SemanticError("1-3-33")
|
|
372
449
|
|
|
373
|
-
groupings = []
|
|
450
|
+
groupings: Any = []
|
|
374
451
|
having = None
|
|
375
452
|
grouping_op = node.grouping_op
|
|
376
453
|
if node.grouping is not None:
|
|
377
|
-
if grouping_op ==
|
|
454
|
+
if grouping_op == "group all":
|
|
378
455
|
if self.only_semantic:
|
|
379
456
|
data = None
|
|
380
457
|
else:
|
|
381
|
-
data = operand.data
|
|
382
|
-
self.aggregation_dataset = Dataset(
|
|
383
|
-
|
|
384
|
-
|
|
458
|
+
data = copy(operand.data)
|
|
459
|
+
self.aggregation_dataset = Dataset(
|
|
460
|
+
name=operand.name, components=operand.components, data=data
|
|
461
|
+
)
|
|
385
462
|
# For Component handling in operators like time_agg
|
|
386
463
|
self.is_from_grouping = True
|
|
387
464
|
for x in node.grouping:
|
|
388
465
|
groupings.append(self.visit(x))
|
|
389
466
|
self.is_from_grouping = False
|
|
390
|
-
if grouping_op ==
|
|
467
|
+
if grouping_op == "group all":
|
|
391
468
|
comp_grouped = groupings[0]
|
|
392
|
-
if
|
|
469
|
+
if (
|
|
470
|
+
operand.data is not None
|
|
471
|
+
and comp_grouped.data is not None
|
|
472
|
+
and len(comp_grouped.data) > 0
|
|
473
|
+
):
|
|
393
474
|
operand.data[comp_grouped.name] = comp_grouped.data
|
|
394
475
|
groupings = [comp_grouped.name]
|
|
395
476
|
self.aggregation_dataset = None
|
|
396
477
|
if node.having_clause is not None:
|
|
397
|
-
self.aggregation_dataset = Dataset(
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
478
|
+
self.aggregation_dataset = Dataset(
|
|
479
|
+
name=operand.name,
|
|
480
|
+
components=deepcopy(operand.components),
|
|
481
|
+
data=pd.DataFrame(columns=operand.get_components_names()),
|
|
482
|
+
)
|
|
401
483
|
self.aggregation_grouping = extract_grouping_identifiers(
|
|
402
|
-
operand.get_identifiers_names(),
|
|
403
|
-
|
|
404
|
-
groupings)
|
|
484
|
+
operand.get_identifiers_names(), node.grouping_op, groupings
|
|
485
|
+
)
|
|
405
486
|
self.is_from_having = True
|
|
406
487
|
# Empty data analysis on having - we do not care about the result
|
|
407
488
|
self.visit(node.having_clause)
|
|
@@ -409,17 +490,17 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
409
490
|
self.is_from_having = False
|
|
410
491
|
self.aggregation_grouping = None
|
|
411
492
|
self.aggregation_dataset = None
|
|
412
|
-
having = getattr(node.having_clause,
|
|
493
|
+
having = getattr(node.having_clause, "expr", "")
|
|
413
494
|
having = self._format_having_expression_udo(having)
|
|
414
495
|
|
|
415
496
|
elif self.is_from_having:
|
|
416
497
|
groupings = self.aggregation_grouping
|
|
417
498
|
# Setting here group by as we have already selected the identifiers we need
|
|
418
|
-
grouping_op =
|
|
499
|
+
grouping_op = "group by"
|
|
419
500
|
|
|
420
501
|
return AGGREGATION_MAPPING[node.op].analyze(operand, grouping_op, groupings, having)
|
|
421
502
|
|
|
422
|
-
def _format_having_expression_udo(self, having: str):
|
|
503
|
+
def _format_having_expression_udo(self, having: str) -> str:
|
|
423
504
|
if self.udo_params is None:
|
|
424
505
|
return having
|
|
425
506
|
for k, v in self.udo_params[-1].items():
|
|
@@ -438,8 +519,10 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
438
519
|
having = having.replace(old_param, new_param)
|
|
439
520
|
return having
|
|
440
521
|
|
|
441
|
-
def visit_Analytic(self, node: AST.Analytic) ->
|
|
522
|
+
def visit_Analytic(self, node: AST.Analytic) -> Any: # noqa: C901
|
|
442
523
|
if self.is_from_regular_aggregation:
|
|
524
|
+
if self.regular_aggregation_dataset is None:
|
|
525
|
+
raise SemanticError("1-1-6-10")
|
|
443
526
|
if node.operand is None:
|
|
444
527
|
operand = self.regular_aggregation_dataset
|
|
445
528
|
else:
|
|
@@ -450,19 +533,20 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
450
533
|
if name != operand_comp.name:
|
|
451
534
|
dataset_components.pop(name)
|
|
452
535
|
|
|
453
|
-
if self.only_semantic:
|
|
536
|
+
if self.only_semantic or self.regular_aggregation_dataset.data is None:
|
|
454
537
|
data = None
|
|
455
538
|
else:
|
|
456
|
-
data = self.regular_aggregation_dataset.data[
|
|
457
|
-
dataset_components.keys()]
|
|
539
|
+
data = self.regular_aggregation_dataset.data[dataset_components.keys()]
|
|
458
540
|
|
|
459
|
-
operand = Dataset(
|
|
460
|
-
|
|
461
|
-
|
|
541
|
+
operand = Dataset(
|
|
542
|
+
name=self.regular_aggregation_dataset.name,
|
|
543
|
+
components=dataset_components,
|
|
544
|
+
data=data,
|
|
545
|
+
)
|
|
462
546
|
|
|
463
547
|
else:
|
|
464
|
-
operand
|
|
465
|
-
partitioning = []
|
|
548
|
+
operand = self.visit(node.operand)
|
|
549
|
+
partitioning: Any = []
|
|
466
550
|
ordering = []
|
|
467
551
|
if self.udo_params is not None:
|
|
468
552
|
if node.partition_by is not None:
|
|
@@ -470,15 +554,23 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
470
554
|
if comp_name in self.udo_params[-1]:
|
|
471
555
|
partitioning.append(self.udo_params[-1][comp_name])
|
|
472
556
|
else:
|
|
473
|
-
raise SemanticError(
|
|
474
|
-
|
|
557
|
+
raise SemanticError(
|
|
558
|
+
"2-3-9",
|
|
559
|
+
comp_type="Component",
|
|
560
|
+
comp_name=comp_name,
|
|
561
|
+
param="UDO parameters",
|
|
562
|
+
)
|
|
475
563
|
if node.order_by is not None:
|
|
476
564
|
for o in node.order_by:
|
|
477
565
|
if o.component in self.udo_params[-1]:
|
|
478
566
|
o.component = self.udo_params[-1][o.component]
|
|
479
567
|
else:
|
|
480
|
-
raise SemanticError(
|
|
481
|
-
|
|
568
|
+
raise SemanticError(
|
|
569
|
+
"2-3-9",
|
|
570
|
+
comp_type="Component",
|
|
571
|
+
comp_name=o.component,
|
|
572
|
+
param="UDO parameters",
|
|
573
|
+
)
|
|
482
574
|
ordering = node.order_by
|
|
483
575
|
|
|
484
576
|
else:
|
|
@@ -487,7 +579,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
487
579
|
if not isinstance(operand, Dataset):
|
|
488
580
|
raise SemanticError("2-3-4", op=node.op, comp="dataset")
|
|
489
581
|
if node.partition_by is None:
|
|
490
|
-
order_components =
|
|
582
|
+
order_components = (
|
|
583
|
+
[x.component for x in node.order_by] if node.order_by is not None else []
|
|
584
|
+
)
|
|
491
585
|
partitioning = [x for x in operand.get_identifiers_names() if x not in order_components]
|
|
492
586
|
|
|
493
587
|
params = []
|
|
@@ -498,16 +592,22 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
498
592
|
else:
|
|
499
593
|
params.append(param)
|
|
500
594
|
|
|
501
|
-
result = ANALYTIC_MAPPING[node.op].analyze(
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
595
|
+
result = ANALYTIC_MAPPING[node.op].analyze(
|
|
596
|
+
operand=operand,
|
|
597
|
+
partitioning=partitioning,
|
|
598
|
+
ordering=ordering,
|
|
599
|
+
window=node.window,
|
|
600
|
+
params=params,
|
|
601
|
+
)
|
|
506
602
|
if not self.is_from_regular_aggregation:
|
|
507
603
|
return result
|
|
508
604
|
|
|
509
605
|
# Extracting the components we need (only identifiers)
|
|
510
|
-
id_columns =
|
|
606
|
+
id_columns = (
|
|
607
|
+
self.regular_aggregation_dataset.get_identifiers_names()
|
|
608
|
+
if (self.regular_aggregation_dataset is not None)
|
|
609
|
+
else None
|
|
610
|
+
)
|
|
511
611
|
|
|
512
612
|
# # Extracting the component we need (only measure)
|
|
513
613
|
measure_name = result.get_measures_names()[0]
|
|
@@ -515,21 +615,29 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
515
615
|
if self.only_semantic:
|
|
516
616
|
data = None
|
|
517
617
|
else:
|
|
518
|
-
|
|
519
|
-
self.regular_aggregation_dataset
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
618
|
+
if (
|
|
619
|
+
self.regular_aggregation_dataset is not None
|
|
620
|
+
and self.regular_aggregation_dataset.data is not None
|
|
621
|
+
):
|
|
622
|
+
joined_result = pd.merge(
|
|
623
|
+
self.regular_aggregation_dataset.data[id_columns],
|
|
624
|
+
result.data,
|
|
625
|
+
on=id_columns,
|
|
626
|
+
how="inner",
|
|
627
|
+
)
|
|
628
|
+
data = joined_result[measure_name]
|
|
629
|
+
else:
|
|
630
|
+
data = None
|
|
631
|
+
|
|
632
|
+
return DataComponent(
|
|
633
|
+
name=measure_name,
|
|
634
|
+
data=data,
|
|
635
|
+
data_type=result.components[measure_name].data_type,
|
|
636
|
+
role=result.components[measure_name].role,
|
|
637
|
+
nullable=result.components[measure_name].nullable,
|
|
638
|
+
)
|
|
639
|
+
|
|
640
|
+
def visit_MulOp(self, node: AST.MulOp) -> None:
|
|
533
641
|
"""
|
|
534
642
|
MulOp: (op, children)
|
|
535
643
|
|
|
@@ -583,9 +691,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
583
691
|
return Current_Date.analyze()
|
|
584
692
|
|
|
585
693
|
else:
|
|
586
|
-
raise SemanticError("1-3-5", op_type=
|
|
694
|
+
raise SemanticError("1-3-5", op_type="MulOp", node_op=node.op)
|
|
587
695
|
|
|
588
|
-
def visit_VarID(self, node: AST.VarID) -> Any:
|
|
696
|
+
def visit_VarID(self, node: AST.VarID) -> Any: # noqa: C901
|
|
589
697
|
if self.is_from_assignment:
|
|
590
698
|
return node.value
|
|
591
699
|
# Having takes precedence as it is lower in the AST
|
|
@@ -595,83 +703,102 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
595
703
|
return udo_element
|
|
596
704
|
# If it is only the component or dataset name, we rename the node.value
|
|
597
705
|
node.value = udo_element
|
|
598
|
-
if self.is_from_having or self.is_from_grouping:
|
|
706
|
+
if self.aggregation_dataset is not None and (self.is_from_having or self.is_from_grouping):
|
|
599
707
|
if node.value not in self.aggregation_dataset.components:
|
|
600
|
-
raise SemanticError(
|
|
601
|
-
|
|
708
|
+
raise SemanticError(
|
|
709
|
+
"1-1-1-10",
|
|
710
|
+
op=None,
|
|
711
|
+
comp_name=node.value,
|
|
712
|
+
dataset_name=self.aggregation_dataset.name,
|
|
713
|
+
)
|
|
602
714
|
if self.aggregation_dataset.data is None:
|
|
603
715
|
data = None
|
|
604
716
|
else:
|
|
605
717
|
data = self.aggregation_dataset.data[node.value]
|
|
606
|
-
return DataComponent(
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
718
|
+
return DataComponent(
|
|
719
|
+
name=node.value,
|
|
720
|
+
data=data,
|
|
721
|
+
data_type=self.aggregation_dataset.components[node.value].data_type,
|
|
722
|
+
role=self.aggregation_dataset.components[node.value].role,
|
|
723
|
+
nullable=self.aggregation_dataset.components[node.value].nullable,
|
|
724
|
+
)
|
|
612
725
|
if self.is_from_regular_aggregation:
|
|
613
726
|
if self.is_from_join and node.value in self.datasets.keys():
|
|
614
727
|
return self.datasets[node.value]
|
|
615
|
-
if
|
|
616
|
-
if node.value in self.
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
728
|
+
if self.regular_aggregation_dataset is not None:
|
|
729
|
+
if node.value in self.datasets and isinstance(self.datasets[node.value], Scalar):
|
|
730
|
+
if node.value in self.regular_aggregation_dataset.components:
|
|
731
|
+
raise SemanticError("1-1-6-11", comp_name=node.value)
|
|
732
|
+
return self.datasets[node.value]
|
|
733
|
+
if self.regular_aggregation_dataset.data is not None:
|
|
734
|
+
if (
|
|
735
|
+
self.is_from_join
|
|
736
|
+
and node.value
|
|
737
|
+
not in self.regular_aggregation_dataset.get_components_names()
|
|
738
|
+
):
|
|
739
|
+
is_partial_present = 0
|
|
740
|
+
found_comp = None
|
|
741
|
+
for comp_name in self.regular_aggregation_dataset.get_components_names():
|
|
742
|
+
if "#" in comp_name and comp_name.split("#")[1] == node.value:
|
|
743
|
+
is_partial_present += 1
|
|
744
|
+
found_comp = comp_name
|
|
745
|
+
elif "#" in node.value and node.value.split("#")[1] == comp_name:
|
|
746
|
+
is_partial_present += 1
|
|
747
|
+
found_comp = comp_name
|
|
748
|
+
if is_partial_present == 0:
|
|
749
|
+
raise SemanticError(
|
|
750
|
+
"1-1-1-10",
|
|
751
|
+
comp_name=node.value,
|
|
752
|
+
dataset_name=self.regular_aggregation_dataset.name,
|
|
753
|
+
)
|
|
754
|
+
elif is_partial_present == 2:
|
|
755
|
+
raise SemanticError("1-1-13-9", comp_name=node.value)
|
|
756
|
+
node.value = found_comp
|
|
757
|
+
if node.value not in self.regular_aggregation_dataset.components:
|
|
758
|
+
raise SemanticError(
|
|
759
|
+
"1-1-1-10",
|
|
760
|
+
comp_name=node.value,
|
|
761
|
+
dataset_name=self.regular_aggregation_dataset.name,
|
|
762
|
+
)
|
|
763
|
+
data = self.regular_aggregation_dataset.data[node.value]
|
|
764
|
+
else:
|
|
765
|
+
data = None
|
|
766
|
+
return DataComponent(
|
|
767
|
+
name=node.value,
|
|
768
|
+
data=data,
|
|
769
|
+
data_type=self.regular_aggregation_dataset.components[node.value].data_type,
|
|
770
|
+
role=self.regular_aggregation_dataset.components[node.value].role,
|
|
771
|
+
nullable=self.regular_aggregation_dataset.components[node.value].nullable,
|
|
772
|
+
)
|
|
773
|
+
if (
|
|
774
|
+
self.is_from_rule
|
|
775
|
+
and self.ruleset_dataset is not None
|
|
776
|
+
and self.ruleset_signature is not None
|
|
777
|
+
):
|
|
654
778
|
if node.value not in self.ruleset_signature:
|
|
655
779
|
raise SemanticError("1-1-10-7", comp_name=node.value)
|
|
656
780
|
comp_name = self.ruleset_signature[node.value]
|
|
657
781
|
if comp_name not in self.ruleset_dataset.components:
|
|
658
|
-
raise SemanticError(
|
|
659
|
-
|
|
782
|
+
raise SemanticError(
|
|
783
|
+
"1-1-1-10", comp_name=node.value, dataset_name=self.ruleset_dataset.name
|
|
784
|
+
)
|
|
660
785
|
if self.rule_data is None:
|
|
661
786
|
data = None
|
|
662
787
|
else:
|
|
663
788
|
data = self.rule_data[comp_name]
|
|
664
|
-
return DataComponent(
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
789
|
+
return DataComponent(
|
|
790
|
+
name=comp_name,
|
|
791
|
+
data=data,
|
|
792
|
+
data_type=self.ruleset_dataset.components[comp_name].data_type,
|
|
793
|
+
role=self.ruleset_dataset.components[comp_name].role,
|
|
794
|
+
nullable=self.ruleset_dataset.components[comp_name].nullable,
|
|
795
|
+
)
|
|
669
796
|
if node.value not in self.datasets:
|
|
670
797
|
raise SemanticError("2-3-6", dataset_name=node.value)
|
|
671
798
|
return self.datasets[node.value]
|
|
672
799
|
|
|
673
800
|
def visit_Collection(self, node: AST.Collection) -> Any:
|
|
674
|
-
if node.kind ==
|
|
801
|
+
if node.kind == "Set":
|
|
675
802
|
elements = []
|
|
676
803
|
duplicates = []
|
|
677
804
|
for child in node.children:
|
|
@@ -685,14 +812,14 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
685
812
|
if len(duplicates) > 0:
|
|
686
813
|
raise SemanticError("1-3-9", duplicates=duplicates)
|
|
687
814
|
for element in elements:
|
|
688
|
-
if type(element)
|
|
815
|
+
if type(element) is not type(elements[0]):
|
|
689
816
|
raise Exception("All elements in a set must be of the same type")
|
|
690
817
|
if len(elements) == 0:
|
|
691
818
|
raise Exception("A set must contain at least one element")
|
|
692
819
|
if len(elements) != len(set(elements)):
|
|
693
820
|
raise Exception("A set must not contain duplicates")
|
|
694
821
|
return ScalarSet(data_type=BASIC_TYPES[type(elements[0])], values=elements)
|
|
695
|
-
elif node.kind ==
|
|
822
|
+
elif node.kind == "ValueDomain":
|
|
696
823
|
if self.value_domains is None:
|
|
697
824
|
raise SemanticError("2-3-10", comp_type="Value Domains")
|
|
698
825
|
if node.name not in self.value_domains:
|
|
@@ -702,7 +829,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
702
829
|
else:
|
|
703
830
|
raise SemanticError("1-3-26", name=node.name)
|
|
704
831
|
|
|
705
|
-
def visit_RegularAggregation(self, node: AST.RegularAggregation) -> None:
|
|
832
|
+
def visit_RegularAggregation(self, node: AST.RegularAggregation) -> None: # noqa: C901
|
|
706
833
|
operands = []
|
|
707
834
|
dataset = self.visit(node.dataset)
|
|
708
835
|
if isinstance(dataset, Scalar):
|
|
@@ -720,11 +847,19 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
720
847
|
raise SemanticError("1-3-35", op=node.op)
|
|
721
848
|
if node.op == AGGREGATE:
|
|
722
849
|
# Extracting the role encoded inside the children assignments
|
|
723
|
-
role_info = {
|
|
850
|
+
role_info = {
|
|
851
|
+
child.left.value: child.left.role
|
|
852
|
+
for child in node.children
|
|
853
|
+
if hasattr(child, "left")
|
|
854
|
+
}
|
|
724
855
|
dataset = copy(operands[0])
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
856
|
+
if self.regular_aggregation_dataset is not None:
|
|
857
|
+
dataset.name = self.regular_aggregation_dataset.name
|
|
858
|
+
dataset.components = {
|
|
859
|
+
comp_name: comp
|
|
860
|
+
for comp_name, comp in dataset.components.items()
|
|
861
|
+
if comp.role != Role.MEASURE
|
|
862
|
+
}
|
|
728
863
|
if dataset.data is not None:
|
|
729
864
|
dataset.data = dataset.data[dataset.get_identifiers_names()]
|
|
730
865
|
aux_operands = []
|
|
@@ -733,8 +868,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
733
868
|
data = operand.data[measure.name] if operand.data is not None else None
|
|
734
869
|
# Getting role from encoded information
|
|
735
870
|
# (handling also UDO params as it is present in the value of the mapping)
|
|
736
|
-
if
|
|
737
|
-
operand.name in self.udo_params[-1].values()):
|
|
871
|
+
if self.udo_params is not None and operand.name in self.udo_params[-1].values():
|
|
738
872
|
role = None
|
|
739
873
|
for k, v in self.udo_params[-1].items():
|
|
740
874
|
if isinstance(v, str) and v == operand.name:
|
|
@@ -742,41 +876,65 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
742
876
|
role = role_info[role_key]
|
|
743
877
|
else:
|
|
744
878
|
role = role_info[operand.name]
|
|
745
|
-
aux_operands.append(
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
879
|
+
aux_operands.append(
|
|
880
|
+
DataComponent(
|
|
881
|
+
name=operand.name,
|
|
882
|
+
data=data,
|
|
883
|
+
data_type=measure.data_type,
|
|
884
|
+
role=role if role is not None else measure.role,
|
|
885
|
+
nullable=measure.nullable,
|
|
886
|
+
)
|
|
887
|
+
)
|
|
750
888
|
operands = aux_operands
|
|
751
889
|
self.regular_aggregation_dataset = None
|
|
752
890
|
if node.op == FILTER:
|
|
753
|
-
if not isinstance(operands[0], DataComponent):
|
|
891
|
+
if not isinstance(operands[0], DataComponent) and hasattr(child, "left"):
|
|
754
892
|
measure = child.left.value
|
|
755
|
-
operands[0] = DataComponent(
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
893
|
+
operands[0] = DataComponent(
|
|
894
|
+
name=measure,
|
|
895
|
+
data=operands[0].data[measure],
|
|
896
|
+
data_type=operands[0].components[measure].data_type,
|
|
897
|
+
role=operands[0].components[measure].role,
|
|
898
|
+
nullable=operands[0].components[measure].nullable,
|
|
899
|
+
)
|
|
760
900
|
return REGULAR_AGGREGATION_MAPPING[node.op].analyze(operands[0], dataset)
|
|
761
901
|
if self.is_from_join:
|
|
762
902
|
if node.op in [DROP, KEEP]:
|
|
763
|
-
operands = [
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
903
|
+
operands = [
|
|
904
|
+
(
|
|
905
|
+
operand.get_measures_names()
|
|
906
|
+
if isinstance(operand, Dataset)
|
|
907
|
+
else (
|
|
908
|
+
operand.name
|
|
909
|
+
if isinstance(operand, DataComponent)
|
|
910
|
+
and operand.role is not Role.IDENTIFIER
|
|
911
|
+
else operand
|
|
912
|
+
)
|
|
913
|
+
)
|
|
914
|
+
for operand in operands
|
|
915
|
+
]
|
|
916
|
+
operands = list(
|
|
917
|
+
set(
|
|
918
|
+
[
|
|
919
|
+
item
|
|
920
|
+
for sublist in operands
|
|
921
|
+
for item in (sublist if isinstance(sublist, list) else [sublist])
|
|
922
|
+
]
|
|
923
|
+
)
|
|
924
|
+
)
|
|
769
925
|
result = REGULAR_AGGREGATION_MAPPING[node.op].analyze(operands, dataset)
|
|
770
926
|
if node.isLast:
|
|
771
927
|
if result.data is not None:
|
|
772
928
|
result.data.rename(
|
|
773
|
-
columns={col: col[col.find(
|
|
774
|
-
inplace=True
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
929
|
+
columns={col: col[col.find("#") + 1 :] for col in result.data.columns},
|
|
930
|
+
inplace=True,
|
|
931
|
+
)
|
|
932
|
+
result.components = {
|
|
933
|
+
comp_name[comp_name.find("#") + 1 :]: comp
|
|
934
|
+
for comp_name, comp in result.components.items()
|
|
935
|
+
}
|
|
778
936
|
for comp in result.components.values():
|
|
779
|
-
comp.name = comp.name[comp.name.find(
|
|
937
|
+
comp.name = comp.name[comp.name.find("#") + 1 :]
|
|
780
938
|
if result.data is not None:
|
|
781
939
|
result.data.reset_index(drop=True, inplace=True)
|
|
782
940
|
self.is_from_join = False
|
|
@@ -793,8 +951,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
793
951
|
thenValue = self.visit(node.thenOp)
|
|
794
952
|
elseValue = self.visit(node.elseOp)
|
|
795
953
|
if not isinstance(thenValue, Scalar) or not isinstance(elseValue, Scalar):
|
|
796
|
-
raise SemanticError(
|
|
797
|
-
|
|
954
|
+
raise SemanticError(
|
|
955
|
+
"1-1-9-3", op="If_op", then_name=thenValue.name, else_name=elseValue.name
|
|
956
|
+
)
|
|
798
957
|
if condition.value:
|
|
799
958
|
return self.visit(node.thenOp)
|
|
800
959
|
else:
|
|
@@ -810,20 +969,21 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
810
969
|
self.else_condition_dataset = []
|
|
811
970
|
self.generate_then_else_datasets(copy(condition))
|
|
812
971
|
|
|
813
|
-
self.if_stack.append(THEN_ELSE[
|
|
972
|
+
self.if_stack.append(THEN_ELSE["then"])
|
|
814
973
|
self.is_from_if = True
|
|
815
|
-
self.nested_if =
|
|
974
|
+
self.nested_if = "T" if isinstance(node.thenOp, AST.If) else False
|
|
816
975
|
thenOp = self.visit(node.thenOp)
|
|
817
976
|
if isinstance(thenOp, Scalar) or not isinstance(node.thenOp, AST.BinOp):
|
|
818
977
|
self.then_condition_dataset.pop()
|
|
819
978
|
self.if_stack.pop()
|
|
820
979
|
|
|
821
|
-
self.if_stack.append(THEN_ELSE[
|
|
980
|
+
self.if_stack.append(THEN_ELSE["else"])
|
|
822
981
|
self.is_from_if = True
|
|
823
|
-
self.nested_if =
|
|
982
|
+
self.nested_if = "E" if isinstance(node.elseOp, AST.If) else False
|
|
824
983
|
elseOp = self.visit(node.elseOp)
|
|
825
984
|
if isinstance(elseOp, Scalar) or (
|
|
826
|
-
|
|
985
|
+
not isinstance(node.elseOp, AST.BinOp) and not isinstance(node.elseOp, AST.If)
|
|
986
|
+
):
|
|
827
987
|
if len(self.else_condition_dataset) > 0:
|
|
828
988
|
self.else_condition_dataset.pop()
|
|
829
989
|
if len(self.if_stack) > 0:
|
|
@@ -834,27 +994,32 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
834
994
|
def visit_RenameNode(self, node: AST.RenameNode) -> Any:
|
|
835
995
|
if self.udo_params is not None:
|
|
836
996
|
if "#" in node.old_name:
|
|
837
|
-
if node.old_name.split(
|
|
838
|
-
comp_name = self.udo_params[-1][node.old_name.split(
|
|
997
|
+
if node.old_name.split("#")[1] in self.udo_params[-1]:
|
|
998
|
+
comp_name = self.udo_params[-1][node.old_name.split("#")[1]]
|
|
839
999
|
node.old_name = f"{node.old_name.split('#')[0]}#{comp_name}"
|
|
840
1000
|
else:
|
|
841
1001
|
if node.old_name in self.udo_params[-1]:
|
|
842
1002
|
node.old_name = self.udo_params[-1][node.old_name]
|
|
843
1003
|
|
|
844
|
-
if
|
|
845
|
-
|
|
1004
|
+
if (
|
|
1005
|
+
self.is_from_join
|
|
1006
|
+
and self.regular_aggregation_dataset is not None
|
|
1007
|
+
and node.old_name not in self.regular_aggregation_dataset.components
|
|
1008
|
+
):
|
|
1009
|
+
node.old_name = node.old_name.split("#")[1]
|
|
846
1010
|
|
|
847
1011
|
return node
|
|
848
1012
|
|
|
849
1013
|
def visit_Constant(self, node: AST.Constant) -> Any:
|
|
850
|
-
return Scalar(
|
|
851
|
-
|
|
1014
|
+
return Scalar(
|
|
1015
|
+
name=str(node.value), value=node.value, data_type=BASIC_TYPES[type(node.value)]
|
|
1016
|
+
)
|
|
852
1017
|
|
|
853
1018
|
def visit_JoinOp(self, node: AST.JoinOp) -> None:
|
|
854
1019
|
clause_elements = []
|
|
855
1020
|
for clause in node.clauses:
|
|
856
1021
|
clause_elements.append(self.visit(clause))
|
|
857
|
-
if hasattr(clause,
|
|
1022
|
+
if hasattr(clause, "op") and clause.op == AS:
|
|
858
1023
|
# TODO: We need to delete somewhere the join datasets with alias that are added here
|
|
859
1024
|
self.datasets[clause_elements[-1].name] = clause_elements[-1]
|
|
860
1025
|
|
|
@@ -865,7 +1030,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
865
1030
|
def visit_ParamConstant(self, node: AST.ParamConstant) -> str:
|
|
866
1031
|
return node.value
|
|
867
1032
|
|
|
868
|
-
def visit_ParamOp(self, node: AST.ParamOp) -> None:
|
|
1033
|
+
def visit_ParamOp(self, node: AST.ParamOp) -> None: # noqa: C901
|
|
869
1034
|
if node.op == ROUND:
|
|
870
1035
|
op_element = self.visit(node.children[0])
|
|
871
1036
|
if len(node.params) != 0:
|
|
@@ -899,26 +1064,34 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
899
1064
|
else:
|
|
900
1065
|
raise NotImplementedError
|
|
901
1066
|
elif node.op == HAVING:
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
1067
|
+
if self.aggregation_dataset is not None and self.aggregation_grouping is not None:
|
|
1068
|
+
for id_name in self.aggregation_grouping:
|
|
1069
|
+
if id_name not in self.aggregation_dataset.components:
|
|
1070
|
+
raise SemanticError("1-1-2-4", op=node.op, id_name=id_name)
|
|
1071
|
+
if len(self.aggregation_dataset.get_measures()) != 1:
|
|
1072
|
+
raise ValueError("Only one measure is allowed")
|
|
1073
|
+
# Deepcopy is necessary for components to avoid changing the original dataset
|
|
1074
|
+
self.aggregation_dataset.components = {
|
|
1075
|
+
comp_name: deepcopy(comp)
|
|
1076
|
+
for comp_name, comp in self.aggregation_dataset.components.items()
|
|
1077
|
+
if comp_name in self.aggregation_grouping or comp.role == Role.MEASURE
|
|
1078
|
+
}
|
|
1079
|
+
|
|
1080
|
+
self.aggregation_dataset.data = (
|
|
1081
|
+
self.aggregation_dataset.data[
|
|
1082
|
+
self.aggregation_dataset.get_identifiers_names()
|
|
1083
|
+
+ self.aggregation_dataset.get_measures_names()
|
|
1084
|
+
]
|
|
1085
|
+
if (self.aggregation_dataset.data is not None)
|
|
1086
|
+
else None
|
|
1087
|
+
)
|
|
915
1088
|
result = self.visit(node.params)
|
|
916
1089
|
measure = result.get_measures()[0]
|
|
917
1090
|
if measure.data_type != Boolean:
|
|
918
1091
|
raise SemanticError("1-1-2-3", type=SCALAR_TYPES_CLASS_REVERSE[Boolean])
|
|
919
1092
|
return None
|
|
920
1093
|
elif node.op == FILL_TIME_SERIES:
|
|
921
|
-
mode = self.visit(node.params[0]) if len(node.params) == 1 else
|
|
1094
|
+
mode = self.visit(node.params[0]) if len(node.params) == 1 else "all"
|
|
922
1095
|
return Fill_time_series.analyze(self.visit(node.children[0]), mode)
|
|
923
1096
|
elif node.op == CAST:
|
|
924
1097
|
operand = self.visit(node.children[0])
|
|
@@ -932,7 +1105,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
932
1105
|
if self.dprs is None:
|
|
933
1106
|
raise SemanticError("1-3-19", node_type="Datapoint Rulesets", node_value="")
|
|
934
1107
|
# Checking if ruleset exists
|
|
935
|
-
dpr_name = node.children[1]
|
|
1108
|
+
dpr_name: Any = node.children[1]
|
|
936
1109
|
if dpr_name not in self.dprs:
|
|
937
1110
|
raise SemanticError("1-3-19", node_type="Datapoint Ruleset", node_value=dpr_name)
|
|
938
1111
|
dpr_info = self.dprs[dpr_name]
|
|
@@ -944,37 +1117,44 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
944
1117
|
# Checking if list of components supplied is valid
|
|
945
1118
|
if len(node.children) > 2:
|
|
946
1119
|
for comp_name in node.children[2:]:
|
|
947
|
-
if comp_name not in dataset_element.components:
|
|
948
|
-
raise SemanticError(
|
|
949
|
-
|
|
950
|
-
|
|
1120
|
+
if comp_name.__str__() not in dataset_element.components:
|
|
1121
|
+
raise SemanticError(
|
|
1122
|
+
"1-1-1-10", comp_name=comp_name, dataset_name=dataset_element.name
|
|
1123
|
+
)
|
|
1124
|
+
if dpr_info is not None and dpr_info["signature_type"] == "variable":
|
|
951
1125
|
for i, comp_name in enumerate(node.children[2:]):
|
|
952
|
-
if comp_name != dpr_info[
|
|
953
|
-
raise SemanticError(
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
1126
|
+
if comp_name != dpr_info["params"][i]:
|
|
1127
|
+
raise SemanticError(
|
|
1128
|
+
"1-1-10-3",
|
|
1129
|
+
op=node.op,
|
|
1130
|
+
expected=dpr_info["params"][i],
|
|
1131
|
+
found=comp_name,
|
|
1132
|
+
)
|
|
1133
|
+
|
|
1134
|
+
output: Any = node.params[0] # invalid, all_measures, all
|
|
1135
|
+
if dpr_info is None:
|
|
1136
|
+
dpr_info = {}
|
|
958
1137
|
|
|
959
1138
|
rule_output_values = {}
|
|
960
1139
|
self.ruleset_dataset = dataset_element
|
|
961
|
-
self.ruleset_signature = dpr_info[
|
|
1140
|
+
self.ruleset_signature = dpr_info["signature"]
|
|
962
1141
|
self.ruleset_mode = output
|
|
963
1142
|
# Gather rule data, adding the ruleset dataset to the interpreter
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
1143
|
+
if dpr_info is not None:
|
|
1144
|
+
for rule in dpr_info["rules"]:
|
|
1145
|
+
rule_output_values[rule.name] = {
|
|
1146
|
+
"errorcode": rule.erCode,
|
|
1147
|
+
"errorlevel": rule.erLevel,
|
|
1148
|
+
"output": self.visit(rule),
|
|
1149
|
+
}
|
|
970
1150
|
self.ruleset_mode = None
|
|
971
1151
|
self.ruleset_signature = None
|
|
972
1152
|
self.ruleset_dataset = None
|
|
973
1153
|
|
|
974
1154
|
# Datapoint Ruleset final evaluation
|
|
975
|
-
return Check_Datapoint.analyze(
|
|
976
|
-
|
|
977
|
-
|
|
1155
|
+
return Check_Datapoint.analyze(
|
|
1156
|
+
dataset_element=dataset_element, rule_info=rule_output_values, output=output
|
|
1157
|
+
)
|
|
978
1158
|
elif node.op in (CHECK_HIERARCHY, HIERARCHY):
|
|
979
1159
|
if len(node.children) == 3:
|
|
980
1160
|
dataset, component, hr_name = (self.visit(x) for x in node.children)
|
|
@@ -993,134 +1173,151 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
993
1173
|
|
|
994
1174
|
if self.hrs is None:
|
|
995
1175
|
raise SemanticError("1-3-19", node_type="Hierarchical Rulesets", node_value="")
|
|
996
|
-
if hr_name not in self.hrs:
|
|
997
|
-
raise SemanticError("1-3-19", node_type="Hierarchical Ruleset", node_value=hr_name)
|
|
998
|
-
|
|
999
|
-
if not isinstance(dataset, Dataset):
|
|
1000
|
-
raise SemanticError("1-1-1-20", op=node.op)
|
|
1001
|
-
|
|
1002
|
-
hr_info = self.hrs[hr_name]
|
|
1003
|
-
|
|
1004
|
-
if len(cond_components) != len(hr_info['condition']):
|
|
1005
|
-
raise SemanticError("1-1-10-2", op=node.op)
|
|
1006
|
-
|
|
1007
|
-
if hr_info['node'].signature_type == 'variable' and hr_info['signature'] != component:
|
|
1008
|
-
raise SemanticError("1-1-10-3", op=node.op,
|
|
1009
|
-
found=component,
|
|
1010
|
-
expected=hr_info['signature'])
|
|
1011
|
-
elif hr_info['node'].signature_type == 'valuedomain' and component is None:
|
|
1012
|
-
raise SemanticError("1-1-10-4", op=node.op)
|
|
1013
|
-
|
|
1014
|
-
cond_info = {}
|
|
1015
|
-
for i, cond_comp in enumerate(hr_info['condition']):
|
|
1016
|
-
if hr_info['node'].signature_type == 'variable' and cond_components[i] != cond_comp:
|
|
1017
|
-
raise SemanticError("1-1-10-6", op=node.op,
|
|
1018
|
-
expected=cond_comp, found=cond_components[i])
|
|
1019
|
-
cond_info[cond_comp] = cond_components[i]
|
|
1020
|
-
|
|
1021
|
-
if node.op == HIERARCHY:
|
|
1022
|
-
aux = []
|
|
1023
|
-
for rule in hr_info['rules']:
|
|
1024
|
-
if rule.rule.op == EQ:
|
|
1025
|
-
aux.append(rule)
|
|
1026
|
-
elif rule.rule.op == WHEN:
|
|
1027
|
-
if rule.rule.right.op == EQ:
|
|
1028
|
-
aux.append(rule)
|
|
1029
|
-
# Filter only the rules with HRBinOP as =,
|
|
1030
|
-
# as they are the ones that will be computed
|
|
1031
|
-
if len(aux) == 0:
|
|
1032
|
-
raise SemanticError("1-1-10-5")
|
|
1033
|
-
hr_info['rules'] = aux
|
|
1034
|
-
|
|
1035
|
-
hierarchy_ast = AST.HRuleset(name=hr_name,
|
|
1036
|
-
signature_type=hr_info['node'].signature_type,
|
|
1037
|
-
element=hr_info['node'].element, rules=aux)
|
|
1038
|
-
HRDAGAnalyzer().visit(hierarchy_ast)
|
|
1039
|
-
|
|
1040
|
-
Check_Hierarchy.validate_hr_dataset(dataset, component)
|
|
1041
|
-
|
|
1042
|
-
# Gather rule data, adding the necessary elements to the interpreter
|
|
1043
|
-
# for simplicity
|
|
1044
|
-
self.ruleset_dataset = dataset
|
|
1045
|
-
self.ruleset_signature = {**{"RULE_COMPONENT": component}, **cond_info}
|
|
1046
|
-
self.ruleset_mode = mode
|
|
1047
|
-
self.hr_input = input_
|
|
1048
|
-
rule_output_values = {}
|
|
1049
|
-
if node.op == HIERARCHY:
|
|
1050
|
-
self.is_from_hr_agg = True
|
|
1051
|
-
self.hr_agg_rules_computed = {}
|
|
1052
|
-
for rule in hr_info['rules']:
|
|
1053
|
-
self.visit(rule)
|
|
1054
|
-
self.is_from_hr_agg = False
|
|
1055
|
-
else:
|
|
1056
|
-
self.is_from_hr_val = True
|
|
1057
|
-
for rule in hr_info['rules']:
|
|
1058
|
-
rule_output_values[rule.name] = {
|
|
1059
|
-
"errorcode": rule.erCode,
|
|
1060
|
-
"errorlevel": rule.erLevel,
|
|
1061
|
-
"output": self.visit(rule)
|
|
1062
|
-
}
|
|
1063
|
-
self.is_from_hr_val = False
|
|
1064
|
-
self.ruleset_signature = None
|
|
1065
|
-
self.ruleset_dataset = None
|
|
1066
|
-
self.ruleset_mode = None
|
|
1067
|
-
self.hr_input = None
|
|
1068
|
-
|
|
1069
|
-
# Final evaluation
|
|
1070
|
-
if node.op == CHECK_HIERARCHY:
|
|
1071
|
-
result = Check_Hierarchy.analyze(dataset_element=dataset,
|
|
1072
|
-
rule_info=rule_output_values,
|
|
1073
|
-
output=output)
|
|
1074
|
-
del rule_output_values
|
|
1075
1176
|
else:
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1177
|
+
if hr_name not in self.hrs:
|
|
1178
|
+
raise SemanticError(
|
|
1179
|
+
"1-3-19", node_type="Hierarchical Ruleset", node_value=hr_name
|
|
1180
|
+
)
|
|
1181
|
+
|
|
1182
|
+
if not isinstance(dataset, Dataset):
|
|
1183
|
+
raise SemanticError("1-1-1-20", op=node.op)
|
|
1184
|
+
|
|
1185
|
+
hr_info = self.hrs[hr_name]
|
|
1186
|
+
if hr_info is not None:
|
|
1187
|
+
if len(cond_components) != len(hr_info["condition"]):
|
|
1188
|
+
raise SemanticError("1-1-10-2", op=node.op)
|
|
1189
|
+
|
|
1190
|
+
if (
|
|
1191
|
+
hr_info["node"].signature_type == "variable"
|
|
1192
|
+
and hr_info["signature"] != component
|
|
1193
|
+
):
|
|
1194
|
+
raise SemanticError(
|
|
1195
|
+
"1-1-10-3", op=node.op, found=component, expected=hr_info["signature"]
|
|
1196
|
+
)
|
|
1197
|
+
elif hr_info["node"].signature_type == "valuedomain" and component is None:
|
|
1198
|
+
raise SemanticError("1-1-10-4", op=node.op)
|
|
1199
|
+
|
|
1200
|
+
cond_info = {}
|
|
1201
|
+
for i, cond_comp in enumerate(hr_info["condition"]):
|
|
1202
|
+
if (
|
|
1203
|
+
hr_info["node"].signature_type == "variable"
|
|
1204
|
+
and cond_components[i] != cond_comp
|
|
1205
|
+
):
|
|
1206
|
+
raise SemanticError(
|
|
1207
|
+
"1-1-10-6", op=node.op, expected=cond_comp, found=cond_components[i]
|
|
1208
|
+
)
|
|
1209
|
+
cond_info[cond_comp] = cond_components[i]
|
|
1210
|
+
|
|
1211
|
+
if node.op == HIERARCHY:
|
|
1212
|
+
aux = []
|
|
1213
|
+
for rule in hr_info["rules"]:
|
|
1214
|
+
if rule.rule.op == EQ:
|
|
1215
|
+
aux.append(rule)
|
|
1216
|
+
elif rule.rule.op == WHEN:
|
|
1217
|
+
if rule.rule.right.op == EQ:
|
|
1218
|
+
aux.append(rule)
|
|
1219
|
+
# Filter only the rules with HRBinOP as =,
|
|
1220
|
+
# as they are the ones that will be computed
|
|
1221
|
+
if len(aux) == 0:
|
|
1222
|
+
raise SemanticError("1-1-10-5")
|
|
1223
|
+
hr_info["rules"] = aux
|
|
1224
|
+
|
|
1225
|
+
hierarchy_ast = AST.HRuleset(
|
|
1226
|
+
name=hr_name,
|
|
1227
|
+
signature_type=hr_info["node"].signature_type,
|
|
1228
|
+
element=hr_info["node"].element,
|
|
1229
|
+
rules=aux,
|
|
1230
|
+
)
|
|
1231
|
+
HRDAGAnalyzer().visit(hierarchy_ast)
|
|
1232
|
+
|
|
1233
|
+
Check_Hierarchy.validate_hr_dataset(dataset, component)
|
|
1234
|
+
|
|
1235
|
+
# Gather rule data, adding the necessary elements to the interpreter
|
|
1236
|
+
# for simplicity
|
|
1237
|
+
self.ruleset_dataset = dataset
|
|
1238
|
+
self.ruleset_signature = {**{"RULE_COMPONENT": component}, **cond_info}
|
|
1239
|
+
self.ruleset_mode = mode
|
|
1240
|
+
self.hr_input = input_
|
|
1241
|
+
rule_output_values = {}
|
|
1242
|
+
if node.op == HIERARCHY:
|
|
1243
|
+
self.is_from_hr_agg = True
|
|
1244
|
+
self.hr_agg_rules_computed = {}
|
|
1245
|
+
for rule in hr_info["rules"]:
|
|
1246
|
+
self.visit(rule)
|
|
1247
|
+
self.is_from_hr_agg = False
|
|
1248
|
+
else:
|
|
1249
|
+
self.is_from_hr_val = True
|
|
1250
|
+
for rule in hr_info["rules"]:
|
|
1251
|
+
rule_output_values[rule.name] = {
|
|
1252
|
+
"errorcode": rule.erCode,
|
|
1253
|
+
"errorlevel": rule.erLevel,
|
|
1254
|
+
"output": self.visit(rule),
|
|
1255
|
+
}
|
|
1256
|
+
self.is_from_hr_val = False
|
|
1257
|
+
self.ruleset_signature = None
|
|
1258
|
+
self.ruleset_dataset = None
|
|
1259
|
+
self.ruleset_mode = None
|
|
1260
|
+
self.hr_input = None
|
|
1261
|
+
|
|
1262
|
+
# Final evaluation
|
|
1263
|
+
if node.op == CHECK_HIERARCHY:
|
|
1264
|
+
result = Check_Hierarchy.analyze(
|
|
1265
|
+
dataset_element=dataset, rule_info=rule_output_values, output=output
|
|
1266
|
+
)
|
|
1267
|
+
del rule_output_values
|
|
1268
|
+
else:
|
|
1269
|
+
result = Hierarchy.analyze(dataset, self.hr_agg_rules_computed, output)
|
|
1270
|
+
self.hr_agg_rules_computed = None
|
|
1271
|
+
return result
|
|
1079
1272
|
|
|
1080
|
-
raise SemanticError("1-3-5", op_type=
|
|
1273
|
+
raise SemanticError("1-3-5", op_type="ParamOp", node_op=node.op)
|
|
1081
1274
|
|
|
1082
1275
|
def visit_DPRule(self, node: AST.DPRule) -> None:
|
|
1083
1276
|
self.is_from_rule = True
|
|
1084
|
-
if self.ruleset_dataset
|
|
1085
|
-
self.
|
|
1086
|
-
|
|
1087
|
-
|
|
1277
|
+
if self.ruleset_dataset is not None:
|
|
1278
|
+
if self.ruleset_dataset.data is None:
|
|
1279
|
+
self.rule_data = None
|
|
1280
|
+
else:
|
|
1281
|
+
self.rule_data = self.ruleset_dataset.data.copy()
|
|
1088
1282
|
validation_data = self.visit(node.rule)
|
|
1089
1283
|
if isinstance(validation_data, DataComponent):
|
|
1090
|
-
if self.rule_data is not None:
|
|
1284
|
+
if self.rule_data is not None and self.ruleset_dataset is not None:
|
|
1091
1285
|
aux = self.rule_data.loc[:, self.ruleset_dataset.get_components_names()]
|
|
1092
|
-
aux[
|
|
1286
|
+
aux["bool_var"] = validation_data.data
|
|
1093
1287
|
validation_data = aux
|
|
1094
1288
|
else:
|
|
1095
1289
|
validation_data = None
|
|
1096
1290
|
if self.ruleset_mode == "invalid" and validation_data is not None:
|
|
1097
|
-
validation_data = validation_data[validation_data[
|
|
1291
|
+
validation_data = validation_data[validation_data["bool_var"] == False]
|
|
1098
1292
|
self.rule_data = None
|
|
1099
1293
|
self.is_from_rule = False
|
|
1100
1294
|
return validation_data
|
|
1101
1295
|
|
|
1102
1296
|
def visit_HRule(self, node: AST.HRule) -> None:
|
|
1103
1297
|
self.is_from_rule = True
|
|
1104
|
-
if self.ruleset_dataset
|
|
1105
|
-
self.rule_data =
|
|
1106
|
-
|
|
1107
|
-
|
|
1298
|
+
if self.ruleset_dataset is not None:
|
|
1299
|
+
self.rule_data = (
|
|
1300
|
+
None if self.ruleset_dataset.data is None else self.ruleset_dataset.data.copy()
|
|
1301
|
+
)
|
|
1108
1302
|
rule_result = self.visit(node.rule)
|
|
1109
1303
|
if rule_result is None:
|
|
1110
1304
|
self.is_from_rule = False
|
|
1111
1305
|
return None
|
|
1112
1306
|
if self.is_from_hr_agg:
|
|
1113
1307
|
measure_name = rule_result.get_measures_names()[0]
|
|
1114
|
-
if
|
|
1115
|
-
|
|
1116
|
-
|
|
1308
|
+
if (
|
|
1309
|
+
self.hr_agg_rules_computed is not None
|
|
1310
|
+
and rule_result.data is not None
|
|
1311
|
+
and len(rule_result.data[measure_name]) > 0
|
|
1312
|
+
):
|
|
1313
|
+
self.hr_agg_rules_computed[rule_result.name] = rule_result.data
|
|
1117
1314
|
else:
|
|
1118
1315
|
rule_result = rule_result.data
|
|
1119
1316
|
self.rule_data = None
|
|
1120
1317
|
self.is_from_rule = False
|
|
1121
1318
|
return rule_result
|
|
1122
1319
|
|
|
1123
|
-
def visit_HRBinOp(self, node: AST.HRBinOp) ->
|
|
1320
|
+
def visit_HRBinOp(self, node: AST.HRBinOp) -> Any:
|
|
1124
1321
|
if node.op == WHEN:
|
|
1125
1322
|
filter_comp = self.visit(node.left)
|
|
1126
1323
|
if self.rule_data is None:
|
|
@@ -1128,7 +1325,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1128
1325
|
filtering_indexes = list(filter_comp.data[filter_comp.data == True].index)
|
|
1129
1326
|
# If no filtering indexes, then all datapoints are valid on DPR and HR
|
|
1130
1327
|
if len(filtering_indexes) == 0 and not (self.is_from_hr_agg or self.is_from_hr_val):
|
|
1131
|
-
self.rule_data[
|
|
1328
|
+
self.rule_data["bool_var"] = True
|
|
1132
1329
|
return self.rule_data
|
|
1133
1330
|
non_filtering_indexes = list(set(filter_comp.data.index) - set(filtering_indexes))
|
|
1134
1331
|
|
|
@@ -1138,14 +1335,15 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1138
1335
|
if self.is_from_hr_agg or self.is_from_hr_val:
|
|
1139
1336
|
# We only need to filter rule_data on DPR
|
|
1140
1337
|
return result_validation
|
|
1141
|
-
self.rule_data[
|
|
1142
|
-
original_data = original_data.merge(
|
|
1143
|
-
|
|
1144
|
-
|
|
1338
|
+
self.rule_data["bool_var"] = result_validation.data
|
|
1339
|
+
original_data = original_data.merge(
|
|
1340
|
+
self.rule_data, how="left", on=original_data.columns.tolist()
|
|
1341
|
+
)
|
|
1342
|
+
original_data.loc[non_filtering_indexes, "bool_var"] = True
|
|
1145
1343
|
return original_data
|
|
1146
1344
|
elif node.op in HR_COMP_MAPPING:
|
|
1147
1345
|
self.is_from_assignment = True
|
|
1148
|
-
if self.ruleset_mode in (
|
|
1346
|
+
if self.ruleset_mode in ("partial_null", "partial_zero"):
|
|
1149
1347
|
self.hr_partial_is_valid = []
|
|
1150
1348
|
left_operand = self.visit(node.left)
|
|
1151
1349
|
self.is_from_assignment = False
|
|
@@ -1153,7 +1351,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1153
1351
|
if isinstance(right_operand, Dataset):
|
|
1154
1352
|
right_operand = get_measure_from_dataset(right_operand, node.right.value)
|
|
1155
1353
|
|
|
1156
|
-
if self.ruleset_mode in (
|
|
1354
|
+
if self.ruleset_mode in ("partial_null", "partial_zero"):
|
|
1157
1355
|
# Check all values were present in the dataset
|
|
1158
1356
|
if self.hr_partial_is_valid and not any(self.hr_partial_is_valid):
|
|
1159
1357
|
right_operand.data = right_operand.data.map(lambda x: "REMOVE_VALUE")
|
|
@@ -1162,8 +1360,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1162
1360
|
if self.is_from_hr_agg:
|
|
1163
1361
|
return HAAssignment.analyze(left_operand, right_operand, self.ruleset_mode)
|
|
1164
1362
|
else:
|
|
1165
|
-
result = HR_COMP_MAPPING[node.op].analyze(
|
|
1166
|
-
|
|
1363
|
+
result = HR_COMP_MAPPING[node.op].analyze(
|
|
1364
|
+
left_operand, right_operand, self.ruleset_mode
|
|
1365
|
+
)
|
|
1167
1366
|
left_measure = left_operand.get_measures()[0]
|
|
1168
1367
|
if left_operand.data is None:
|
|
1169
1368
|
result.data = None
|
|
@@ -1175,14 +1374,23 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1175
1374
|
else:
|
|
1176
1375
|
left_operand = self.visit(node.left)
|
|
1177
1376
|
right_operand = self.visit(node.right)
|
|
1178
|
-
if
|
|
1179
|
-
|
|
1180
|
-
|
|
1377
|
+
if (
|
|
1378
|
+
isinstance(left_operand, Dataset)
|
|
1379
|
+
and isinstance(right_operand, Dataset)
|
|
1380
|
+
and self.ruleset_mode in ("partial_null", "partial_zero")
|
|
1381
|
+
and not self.only_semantic
|
|
1382
|
+
):
|
|
1181
1383
|
measure_name = left_operand.get_measures_names()[0]
|
|
1384
|
+
if left_operand.data is None:
|
|
1385
|
+
left_operand.data = pd.DataFrame({measure_name: []})
|
|
1386
|
+
if right_operand.data is None:
|
|
1387
|
+
right_operand.data = pd.DataFrame({measure_name: []})
|
|
1182
1388
|
left_null_indexes = set(
|
|
1183
|
-
list(left_operand.data[left_operand.data[measure_name].isnull()].index)
|
|
1389
|
+
list(left_operand.data[left_operand.data[measure_name].isnull()].index)
|
|
1390
|
+
)
|
|
1184
1391
|
right_null_indexes = set(
|
|
1185
|
-
list(right_operand.data[right_operand.data[measure_name].isnull()].index)
|
|
1392
|
+
list(right_operand.data[right_operand.data[measure_name].isnull()].index)
|
|
1393
|
+
)
|
|
1186
1394
|
# If no indexes are in common, then one datapoint is not null
|
|
1187
1395
|
invalid_indexes = list(left_null_indexes.intersection(right_null_indexes))
|
|
1188
1396
|
if len(invalid_indexes) > 0:
|
|
@@ -1209,11 +1417,13 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1209
1417
|
if not isinstance(imbalance_element, Dataset):
|
|
1210
1418
|
raise ValueError(f"Expected dataset, got {type(validation_element).__name__}")
|
|
1211
1419
|
|
|
1212
|
-
return Check.analyze(
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1420
|
+
return Check.analyze(
|
|
1421
|
+
validation_element=validation_element,
|
|
1422
|
+
imbalance_element=imbalance_element,
|
|
1423
|
+
error_code=node.error_code,
|
|
1424
|
+
error_level=node.error_level,
|
|
1425
|
+
invalid=node.invalid,
|
|
1426
|
+
)
|
|
1217
1427
|
|
|
1218
1428
|
def visit_EvalOp(self, node: AST.EvalOp) -> Dataset:
|
|
1219
1429
|
"""
|
|
@@ -1234,22 +1444,28 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1234
1444
|
raise SemanticError("2-3-10", comp_type="External Routines")
|
|
1235
1445
|
|
|
1236
1446
|
if node.name not in self.external_routines:
|
|
1237
|
-
raise SemanticError("1-3-5", op_type=
|
|
1447
|
+
raise SemanticError("1-3-5", op_type="External Routine", node_op=node.name)
|
|
1238
1448
|
external_routine = self.external_routines[node.name]
|
|
1239
1449
|
operands = {}
|
|
1240
1450
|
for operand in node.operands:
|
|
1241
|
-
element =
|
|
1451
|
+
element = self.visit(operand)
|
|
1242
1452
|
if not isinstance(element, Dataset):
|
|
1243
1453
|
raise ValueError(f"Expected dataset, got {type(element).__name__} as Eval Operand")
|
|
1244
1454
|
operands[element.name.split(".")[1] if "." in element.name else element.name] = element
|
|
1245
1455
|
output_to_check = node.output
|
|
1246
1456
|
return Eval.analyze(operands, external_routine, output_to_check)
|
|
1247
1457
|
|
|
1248
|
-
def generate_then_else_datasets(self, condition):
|
|
1458
|
+
def generate_then_else_datasets(self, condition: Union[Dataset, DataComponent]) -> None:
|
|
1249
1459
|
components = {}
|
|
1460
|
+
if self.then_condition_dataset is None:
|
|
1461
|
+
self.then_condition_dataset = []
|
|
1462
|
+
if self.else_condition_dataset is None:
|
|
1463
|
+
self.else_condition_dataset = []
|
|
1250
1464
|
if isinstance(condition, Dataset):
|
|
1251
|
-
if
|
|
1252
|
-
|
|
1465
|
+
if (
|
|
1466
|
+
len(condition.get_measures()) != 1
|
|
1467
|
+
or condition.get_measures()[0].data_type != BASIC_TYPES[bool]
|
|
1468
|
+
):
|
|
1253
1469
|
raise ValueError("Only one boolean measure is allowed on condition dataset")
|
|
1254
1470
|
name = condition.get_measures_names()[0]
|
|
1255
1471
|
if condition.data is None or condition.data.empty:
|
|
@@ -1268,9 +1484,12 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1268
1484
|
data = condition.data
|
|
1269
1485
|
|
|
1270
1486
|
if data is not None:
|
|
1271
|
-
if self.nested_if:
|
|
1272
|
-
merge_df =
|
|
1273
|
-
|
|
1487
|
+
if self.nested_if and self.if_stack is not None:
|
|
1488
|
+
merge_df = (
|
|
1489
|
+
self.then_condition_dataset[-1]
|
|
1490
|
+
if self.if_stack[-1] == THEN_ELSE["then"]
|
|
1491
|
+
else self.else_condition_dataset[-1]
|
|
1492
|
+
)
|
|
1274
1493
|
indexes = merge_df.data[merge_df.data.columns[-1]]
|
|
1275
1494
|
else:
|
|
1276
1495
|
indexes = data.index
|
|
@@ -1278,13 +1497,21 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1278
1497
|
|
|
1279
1498
|
if isinstance(condition, Dataset):
|
|
1280
1499
|
filtered_data = data.iloc[indexes]
|
|
1281
|
-
then_data =
|
|
1282
|
-
|
|
1500
|
+
then_data: Any = (
|
|
1501
|
+
condition.data[condition.data[name] == True]
|
|
1502
|
+
if (condition.data is not None)
|
|
1503
|
+
else []
|
|
1504
|
+
)
|
|
1505
|
+
then_indexes: Any = list(filtered_data[filtered_data == True].index)
|
|
1283
1506
|
if len(then_data) > len(then_indexes):
|
|
1284
1507
|
then_data = then_data.iloc[then_indexes]
|
|
1285
1508
|
then_data[name] = then_indexes
|
|
1286
|
-
else_data =
|
|
1287
|
-
|
|
1509
|
+
else_data: Any = (
|
|
1510
|
+
condition.data[condition.data[name] != True]
|
|
1511
|
+
if (condition.data is not None)
|
|
1512
|
+
else []
|
|
1513
|
+
)
|
|
1514
|
+
else_indexes: Any = list(set(indexes) - set(then_indexes))
|
|
1288
1515
|
if len(else_data) > len(else_indexes):
|
|
1289
1516
|
else_data = else_data.iloc[else_indexes]
|
|
1290
1517
|
else_data[name] = else_indexes
|
|
@@ -1298,28 +1525,44 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1298
1525
|
then_data = pd.DataFrame({name: []})
|
|
1299
1526
|
else_data = pd.DataFrame({name: []})
|
|
1300
1527
|
components.update(
|
|
1301
|
-
{
|
|
1302
|
-
|
|
1528
|
+
{
|
|
1529
|
+
name: Component(
|
|
1530
|
+
name=name, data_type=BASIC_TYPES[int], role=Role.MEASURE, nullable=True
|
|
1531
|
+
)
|
|
1532
|
+
}
|
|
1533
|
+
)
|
|
1303
1534
|
then_dataset = Dataset(name=name, components=components, data=then_data)
|
|
1304
1535
|
else_dataset = Dataset(name=name, components=components, data=else_data)
|
|
1305
1536
|
self.then_condition_dataset.append(then_dataset)
|
|
1306
1537
|
self.else_condition_dataset.append(else_dataset)
|
|
1307
1538
|
|
|
1308
|
-
def merge_then_else_datasets(self, left_operand:
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
self.else_condition_dataset
|
|
1539
|
+
def merge_then_else_datasets(self, left_operand: Any, right_operand: Any) -> Any:
|
|
1540
|
+
if (
|
|
1541
|
+
self.then_condition_dataset is None
|
|
1542
|
+
or self.else_condition_dataset is None
|
|
1543
|
+
or self.if_stack is None
|
|
1544
|
+
):
|
|
1545
|
+
return left_operand, right_operand
|
|
1546
|
+
merge_dataset = (
|
|
1547
|
+
self.then_condition_dataset.pop()
|
|
1548
|
+
if self.if_stack.pop() == THEN_ELSE["then"]
|
|
1549
|
+
else (self.else_condition_dataset.pop())
|
|
1550
|
+
)
|
|
1312
1551
|
merge_index = merge_dataset.data[merge_dataset.get_measures_names()[0]].to_list()
|
|
1313
1552
|
ids = merge_dataset.get_identifiers_names()
|
|
1314
1553
|
if isinstance(left_operand, Dataset | DataComponent):
|
|
1315
1554
|
if left_operand.data is None:
|
|
1316
1555
|
return left_operand, right_operand
|
|
1317
1556
|
if isinstance(left_operand, Dataset):
|
|
1318
|
-
dataset_index = left_operand.data.index[
|
|
1319
|
-
|
|
1557
|
+
dataset_index = left_operand.data.index[
|
|
1558
|
+
left_operand.data[ids]
|
|
1559
|
+
.apply(tuple, 1)
|
|
1560
|
+
.isin(merge_dataset.data[ids].apply(tuple, 1))
|
|
1561
|
+
]
|
|
1320
1562
|
left = left_operand.data[left_operand.get_measures_names()[0]]
|
|
1321
1563
|
left_operand.data[left_operand.get_measures_names()[0]] = left.reindex(
|
|
1322
|
-
dataset_index, fill_value=None
|
|
1564
|
+
dataset_index, fill_value=None
|
|
1565
|
+
)
|
|
1323
1566
|
else:
|
|
1324
1567
|
left = left_operand.data
|
|
1325
1568
|
left_operand.data = left.reindex(merge_index, fill_value=None)
|
|
@@ -1328,17 +1571,20 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1328
1571
|
return left_operand, right_operand
|
|
1329
1572
|
if isinstance(right_operand, Dataset):
|
|
1330
1573
|
dataset_index = right_operand.data.index[
|
|
1331
|
-
right_operand.data[ids]
|
|
1332
|
-
|
|
1574
|
+
right_operand.data[ids]
|
|
1575
|
+
.apply(tuple, 1)
|
|
1576
|
+
.isin(merge_dataset.data[ids].apply(tuple, 1))
|
|
1577
|
+
]
|
|
1333
1578
|
right = right_operand.data[right_operand.get_measures_names()[0]]
|
|
1334
1579
|
right_operand.data[right_operand.get_measures_names()[0]] = right.reindex(
|
|
1335
|
-
dataset_index, fill_value=None
|
|
1580
|
+
dataset_index, fill_value=None
|
|
1581
|
+
)
|
|
1336
1582
|
else:
|
|
1337
1583
|
right = right_operand.data
|
|
1338
1584
|
right_operand.data = right.reindex(merge_index, fill_value=None)
|
|
1339
1585
|
return left_operand, right_operand
|
|
1340
1586
|
|
|
1341
|
-
def visit_Identifier(self, node: AST.Identifier) -> AST.AST:
|
|
1587
|
+
def visit_Identifier(self, node: AST.Identifier) -> Union[AST.AST, Dataset, str]:
|
|
1342
1588
|
"""
|
|
1343
1589
|
Identifier: (value)
|
|
1344
1590
|
|
|
@@ -1356,7 +1602,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1356
1602
|
return self.datasets[node.value]
|
|
1357
1603
|
return node.value
|
|
1358
1604
|
|
|
1359
|
-
def visit_DefIdentifier(self, node: AST.DefIdentifier) ->
|
|
1605
|
+
def visit_DefIdentifier(self, node: AST.DefIdentifier) -> Any:
|
|
1360
1606
|
"""
|
|
1361
1607
|
DefIdentifier: (value, kind)
|
|
1362
1608
|
|
|
@@ -1366,51 +1612,62 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1366
1612
|
"""
|
|
1367
1613
|
partial_is_valid = True
|
|
1368
1614
|
# Only for Hierarchical Rulesets
|
|
1369
|
-
if not (self.is_from_rule and node.kind ==
|
|
1615
|
+
if not (self.is_from_rule and node.kind == "CodeItemID"):
|
|
1370
1616
|
return node.value
|
|
1371
1617
|
|
|
1372
1618
|
# Getting Dataset elements
|
|
1373
|
-
result_components = {
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1619
|
+
result_components = {
|
|
1620
|
+
c_name: copy(comp)
|
|
1621
|
+
for c_name, comp in self.ruleset_dataset.components.items() # type: ignore[union-attr]
|
|
1622
|
+
}
|
|
1623
|
+
if self.ruleset_signature is not None:
|
|
1624
|
+
hr_component = self.ruleset_signature["RULE_COMPONENT"]
|
|
1377
1625
|
name = node.value
|
|
1378
1626
|
|
|
1379
1627
|
if self.rule_data is None:
|
|
1380
1628
|
return Dataset(name=name, components=result_components, data=None)
|
|
1381
1629
|
|
|
1382
1630
|
condition = None
|
|
1383
|
-
if hasattr(node,
|
|
1384
|
-
condition: DataComponent = self.visit(node._right_condition)
|
|
1385
|
-
condition
|
|
1386
|
-
|
|
1387
|
-
|
|
1631
|
+
if hasattr(node, "_right_condition"):
|
|
1632
|
+
condition: DataComponent = self.visit(node._right_condition) # type: ignore[no-redef]
|
|
1633
|
+
if condition is not None:
|
|
1634
|
+
condition = condition.data[condition.data == True].index
|
|
1635
|
+
|
|
1636
|
+
if (
|
|
1637
|
+
self.hr_agg_rules_computed is not None
|
|
1638
|
+
and self.hr_input == "rule"
|
|
1639
|
+
and node.value in self.hr_agg_rules_computed
|
|
1640
|
+
):
|
|
1388
1641
|
df = self.hr_agg_rules_computed[node.value].copy()
|
|
1389
1642
|
return Dataset(name=name, components=result_components, data=df)
|
|
1390
1643
|
|
|
1391
1644
|
df = self.rule_data.copy()
|
|
1392
1645
|
if condition is not None:
|
|
1393
1646
|
df = df.loc[condition].reset_index(drop=True)
|
|
1394
|
-
|
|
1647
|
+
|
|
1648
|
+
measure_name = self.ruleset_dataset.get_measures_names()[0] # type: ignore[union-attr]
|
|
1395
1649
|
if node.value in df[hr_component].values:
|
|
1396
|
-
rest_identifiers = [
|
|
1397
|
-
|
|
1650
|
+
rest_identifiers = [
|
|
1651
|
+
comp.name
|
|
1652
|
+
for comp in result_components.values()
|
|
1653
|
+
if comp.role == Role.IDENTIFIER and comp.name != hr_component
|
|
1654
|
+
]
|
|
1398
1655
|
code_data = df[df[hr_component] == node.value].reset_index(drop=True)
|
|
1399
|
-
code_data = code_data.merge(df[rest_identifiers], how=
|
|
1656
|
+
code_data = code_data.merge(df[rest_identifiers], how="right", on=rest_identifiers)
|
|
1400
1657
|
code_data = code_data.drop_duplicates().reset_index(drop=True)
|
|
1401
1658
|
|
|
1402
1659
|
# If the value is in the dataset, we create a new row
|
|
1403
1660
|
# based on the hierarchy mode
|
|
1404
1661
|
# (Missing data points are considered,
|
|
1405
1662
|
# lines 6483-6510 of the reference manual)
|
|
1406
|
-
if self.ruleset_mode in (
|
|
1663
|
+
if self.ruleset_mode in ("partial_null", "partial_zero"):
|
|
1407
1664
|
# We do not care about the presence of the leftCodeItem in Hierarchy Roll-up
|
|
1408
1665
|
if self.is_from_hr_agg and self.is_from_assignment:
|
|
1409
1666
|
pass
|
|
1410
1667
|
elif code_data[hr_component].isnull().any():
|
|
1411
1668
|
partial_is_valid = False
|
|
1412
1669
|
|
|
1413
|
-
if self.ruleset_mode in (
|
|
1670
|
+
if self.ruleset_mode in ("non_zero", "partial_zero", "always_zero"):
|
|
1414
1671
|
fill_indexes = code_data[code_data[hr_component].isnull()].index
|
|
1415
1672
|
code_data.loc[fill_indexes, measure_name] = 0
|
|
1416
1673
|
code_data[hr_component] = node.value
|
|
@@ -1420,86 +1677,117 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1420
1677
|
# based on the hierarchy mode
|
|
1421
1678
|
# (Missing data points are considered,
|
|
1422
1679
|
# lines 6483-6510 of the reference manual)
|
|
1423
|
-
if self.ruleset_mode in (
|
|
1680
|
+
if self.ruleset_mode in ("partial_null", "partial_zero"):
|
|
1424
1681
|
# We do not care about the presence of the leftCodeItem in Hierarchy Roll-up
|
|
1425
1682
|
if self.is_from_hr_agg and self.is_from_assignment:
|
|
1426
1683
|
pass
|
|
1427
|
-
elif self.ruleset_mode ==
|
|
1684
|
+
elif self.ruleset_mode == "partial_null":
|
|
1428
1685
|
partial_is_valid = False
|
|
1429
1686
|
df = df.head(1)
|
|
1430
1687
|
df[hr_component] = node.value
|
|
1431
|
-
if self.ruleset_mode in (
|
|
1688
|
+
if self.ruleset_mode in ("non_zero", "partial_zero", "always_zero"):
|
|
1432
1689
|
df[measure_name] = 0
|
|
1433
1690
|
else: # For non_null, partial_null and always_null
|
|
1434
1691
|
df[measure_name] = None
|
|
1435
|
-
if self.ruleset_mode in (
|
|
1692
|
+
if self.hr_partial_is_valid is not None and self.ruleset_mode in (
|
|
1693
|
+
"partial_null",
|
|
1694
|
+
"partial_zero",
|
|
1695
|
+
):
|
|
1436
1696
|
self.hr_partial_is_valid.append(partial_is_valid)
|
|
1437
1697
|
return Dataset(name=name, components=result_components, data=df)
|
|
1438
1698
|
|
|
1439
|
-
def visit_UDOCall(self, node: AST.UDOCall) -> None:
|
|
1699
|
+
def visit_UDOCall(self, node: AST.UDOCall) -> None: # noqa: C901
|
|
1440
1700
|
if self.udos is None:
|
|
1441
1701
|
raise SemanticError("2-3-10", comp_type="User Defined Operators")
|
|
1442
1702
|
elif node.op not in self.udos:
|
|
1443
|
-
raise SemanticError("1-3-5", node_op=node.op, op_type=
|
|
1444
|
-
|
|
1445
|
-
signature_values = {}
|
|
1703
|
+
raise SemanticError("1-3-5", node_op=node.op, op_type="User Defined Operator")
|
|
1446
1704
|
|
|
1447
1705
|
operator = self.udos[node.op]
|
|
1706
|
+
signature_values = {}
|
|
1448
1707
|
|
|
1449
|
-
if operator
|
|
1450
|
-
|
|
1708
|
+
if operator is None:
|
|
1709
|
+
raise SemanticError("1-3-5", node_op=node.op, op_type="User Defined Operator")
|
|
1710
|
+
if operator["output"] == "Component" and not (
|
|
1711
|
+
self.is_from_regular_aggregation or self.is_from_rule
|
|
1712
|
+
):
|
|
1451
1713
|
raise SemanticError("1-3-29", op=node.op)
|
|
1452
1714
|
|
|
1453
|
-
for i, param in enumerate(operator[
|
|
1715
|
+
for i, param in enumerate(operator["params"]):
|
|
1454
1716
|
if i >= len(node.params):
|
|
1455
|
-
if
|
|
1456
|
-
value = self.visit(param[
|
|
1457
|
-
signature_values[param[
|
|
1458
|
-
|
|
1717
|
+
if "default" in param:
|
|
1718
|
+
value = self.visit(param["default"]).value
|
|
1719
|
+
signature_values[param["name"]] = Scalar(
|
|
1720
|
+
name=str(value), value=value, data_type=BASIC_TYPES[type(value)]
|
|
1721
|
+
)
|
|
1459
1722
|
else:
|
|
1460
|
-
raise SemanticError(
|
|
1461
|
-
|
|
1723
|
+
raise SemanticError(
|
|
1724
|
+
"1-3-28",
|
|
1725
|
+
op=node.op,
|
|
1726
|
+
received=len(node.params),
|
|
1727
|
+
expected=len(operator["params"]),
|
|
1728
|
+
)
|
|
1462
1729
|
else:
|
|
1463
|
-
if isinstance(param[
|
|
1464
|
-
if param[
|
|
1465
|
-
signature_values[param[
|
|
1466
|
-
elif param[
|
|
1730
|
+
if isinstance(param["type"], str): # Scalar, Dataset, Component
|
|
1731
|
+
if param["type"] == "Scalar":
|
|
1732
|
+
signature_values[param["name"]] = self.visit(node.params[i])
|
|
1733
|
+
elif param["type"] in ["Dataset", "Component"]:
|
|
1467
1734
|
if isinstance(node.params[i], AST.VarID):
|
|
1468
|
-
signature_values[param[
|
|
1735
|
+
signature_values[param["name"]] = node.params[
|
|
1736
|
+
i
|
|
1737
|
+
].value # type: ignore[attr-defined]
|
|
1469
1738
|
else:
|
|
1470
1739
|
param_element = self.visit(node.params[i])
|
|
1471
1740
|
if isinstance(param_element, Dataset):
|
|
1472
|
-
if param[
|
|
1473
|
-
raise SemanticError(
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
|
|
1741
|
+
if param["type"] == "Component":
|
|
1742
|
+
raise SemanticError(
|
|
1743
|
+
"1-4-1-1",
|
|
1744
|
+
op=node.op,
|
|
1745
|
+
option=param["name"],
|
|
1746
|
+
type_1=param["type"],
|
|
1747
|
+
type_2="Dataset",
|
|
1748
|
+
)
|
|
1749
|
+
elif isinstance(param_element, Scalar) and param["type"] in [
|
|
1750
|
+
"Dataset",
|
|
1751
|
+
"Component",
|
|
1752
|
+
]:
|
|
1753
|
+
raise SemanticError(
|
|
1754
|
+
"1-4-1-1",
|
|
1755
|
+
op=node.op,
|
|
1756
|
+
option=param["name"],
|
|
1757
|
+
type_1=param["type"],
|
|
1758
|
+
type_2="Scalar",
|
|
1759
|
+
)
|
|
1760
|
+
signature_values[param["name"]] = param_element
|
|
1482
1761
|
|
|
1483
1762
|
else:
|
|
1484
1763
|
raise NotImplementedError
|
|
1485
|
-
elif issubclass(param[
|
|
1764
|
+
elif issubclass(param["type"], ScalarType): # Basic types
|
|
1486
1765
|
# For basic Scalar types (Integer, Float, String, Boolean)
|
|
1487
1766
|
# We validate the type is correct and cast the value
|
|
1488
1767
|
param_element = self.visit(node.params[i])
|
|
1489
1768
|
if isinstance(param_element, (Dataset, DataComponent)):
|
|
1490
|
-
type_2 =
|
|
1491
|
-
raise SemanticError(
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
|
|
1769
|
+
type_2 = "Dataset" if isinstance(param_element, Dataset) else "Component"
|
|
1770
|
+
raise SemanticError(
|
|
1771
|
+
"1-4-1-1",
|
|
1772
|
+
op=node.op,
|
|
1773
|
+
option=param["name"],
|
|
1774
|
+
type_1=param["type"],
|
|
1775
|
+
type_2=type_2,
|
|
1776
|
+
)
|
|
1777
|
+
scalar_type = param["type"]
|
|
1495
1778
|
if not check_unary_implicit_promotion(param_element.data_type, scalar_type):
|
|
1496
|
-
raise SemanticError(
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1779
|
+
raise SemanticError(
|
|
1780
|
+
"2-3-5",
|
|
1781
|
+
param_type=scalar_type,
|
|
1782
|
+
type_name=param_element.data_type,
|
|
1783
|
+
op=node.op,
|
|
1784
|
+
param_name=param["name"],
|
|
1785
|
+
)
|
|
1786
|
+
signature_values[param["name"]] = Scalar(
|
|
1787
|
+
name=param_element.name,
|
|
1788
|
+
value=scalar_type.cast(param_element.value),
|
|
1789
|
+
data_type=scalar_type,
|
|
1790
|
+
)
|
|
1503
1791
|
else:
|
|
1504
1792
|
raise NotImplementedError
|
|
1505
1793
|
|
|
@@ -1511,17 +1799,22 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1511
1799
|
self.udo_params.append(signature_values)
|
|
1512
1800
|
|
|
1513
1801
|
# Calling the UDO AST, we use deepcopy to avoid changing the original UDO AST
|
|
1514
|
-
|
|
1802
|
+
if operator is not None:
|
|
1803
|
+
result = self.visit(deepcopy(operator["expression"]))
|
|
1515
1804
|
|
|
1516
1805
|
if self.is_from_regular_aggregation or self.is_from_rule:
|
|
1517
|
-
result_type =
|
|
1806
|
+
result_type = "Component" if isinstance(result, DataComponent) else "Scalar"
|
|
1518
1807
|
else:
|
|
1519
|
-
result_type =
|
|
1808
|
+
result_type = "Scalar" if isinstance(result, Scalar) else "Dataset"
|
|
1520
1809
|
|
|
1521
|
-
if result_type != operator[
|
|
1522
|
-
raise SemanticError(
|
|
1523
|
-
|
|
1524
|
-
|
|
1810
|
+
if result_type != operator["output"]:
|
|
1811
|
+
raise SemanticError(
|
|
1812
|
+
"1-4-1-1",
|
|
1813
|
+
op=node.op,
|
|
1814
|
+
option="output",
|
|
1815
|
+
type_1=operator["output"],
|
|
1816
|
+
type_2=result_type,
|
|
1817
|
+
)
|
|
1525
1818
|
|
|
1526
1819
|
# We pop the last element of the stack (current UDO params)
|
|
1527
1820
|
# to avoid using them in the next UDO call
|
|
@@ -1535,5 +1828,6 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1535
1828
|
def visit_TimeAggregation(self, node: AST.TimeAggregation) -> None:
|
|
1536
1829
|
operand = self.visit(node.operand)
|
|
1537
1830
|
|
|
1538
|
-
return Time_Aggregation.analyze(
|
|
1539
|
-
|
|
1831
|
+
return Time_Aggregation.analyze(
|
|
1832
|
+
operand=operand, period_from=node.period_from, period_to=node.period_to, conf=node.conf
|
|
1833
|
+
)
|