vtlengine 1.0__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +159 -102
- vtlengine/API/__init__.py +110 -68
- vtlengine/AST/ASTConstructor.py +188 -98
- vtlengine/AST/ASTConstructorModules/Expr.py +402 -205
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +248 -104
- vtlengine/AST/ASTConstructorModules/Terminals.py +158 -95
- vtlengine/AST/ASTEncoders.py +1 -1
- vtlengine/AST/ASTTemplate.py +24 -9
- vtlengine/AST/ASTVisitor.py +8 -12
- vtlengine/AST/DAG/__init__.py +43 -35
- vtlengine/AST/DAG/_words.py +4 -4
- vtlengine/AST/Grammar/Vtl.g4 +49 -20
- vtlengine/AST/Grammar/VtlTokens.g4 +13 -1
- vtlengine/AST/Grammar/lexer.py +2012 -1312
- vtlengine/AST/Grammar/parser.py +7524 -4343
- vtlengine/AST/Grammar/tokens.py +140 -128
- vtlengine/AST/VtlVisitor.py +16 -5
- vtlengine/AST/__init__.py +41 -11
- vtlengine/DataTypes/NumericTypesHandling.py +5 -4
- vtlengine/DataTypes/TimeHandling.py +196 -301
- vtlengine/DataTypes/__init__.py +304 -218
- vtlengine/Exceptions/__init__.py +96 -27
- vtlengine/Exceptions/messages.py +149 -69
- vtlengine/Interpreter/__init__.py +817 -497
- vtlengine/Model/__init__.py +172 -121
- vtlengine/Operators/Aggregation.py +156 -95
- vtlengine/Operators/Analytic.py +167 -79
- vtlengine/Operators/Assignment.py +7 -4
- vtlengine/Operators/Boolean.py +27 -32
- vtlengine/Operators/CastOperator.py +177 -131
- vtlengine/Operators/Clause.py +137 -99
- vtlengine/Operators/Comparison.py +148 -117
- vtlengine/Operators/Conditional.py +290 -98
- vtlengine/Operators/General.py +68 -47
- vtlengine/Operators/HROperators.py +91 -72
- vtlengine/Operators/Join.py +217 -118
- vtlengine/Operators/Numeric.py +129 -46
- vtlengine/Operators/RoleSetter.py +16 -15
- vtlengine/Operators/Set.py +61 -36
- vtlengine/Operators/String.py +213 -139
- vtlengine/Operators/Time.py +467 -215
- vtlengine/Operators/Validation.py +117 -76
- vtlengine/Operators/__init__.py +340 -213
- vtlengine/Utils/__init__.py +232 -41
- vtlengine/__init__.py +1 -1
- vtlengine/files/output/__init__.py +15 -6
- vtlengine/files/output/_time_period_representation.py +10 -9
- vtlengine/files/parser/__init__.py +79 -52
- vtlengine/files/parser/_rfc_dialect.py +6 -5
- vtlengine/files/parser/_time_checking.py +48 -37
- vtlengine-1.0.2.dist-info/METADATA +245 -0
- vtlengine-1.0.2.dist-info/RECORD +58 -0
- {vtlengine-1.0.dist-info → vtlengine-1.0.2.dist-info}/WHEEL +1 -1
- vtlengine-1.0.dist-info/METADATA +0 -104
- vtlengine-1.0.dist-info/RECORD +0 -58
- {vtlengine-1.0.dist-info → vtlengine-1.0.2.dist-info}/LICENSE.md +0 -0
|
@@ -4,37 +4,88 @@ from pathlib import Path
|
|
|
4
4
|
from typing import Any, Dict, List, Optional, Union
|
|
5
5
|
|
|
6
6
|
import vtlengine.AST as AST
|
|
7
|
+
import vtlengine.Exceptions
|
|
7
8
|
import vtlengine.Operators as Operators
|
|
8
9
|
import pandas as pd
|
|
9
|
-
from vtlengine.DataTypes import
|
|
10
|
-
|
|
10
|
+
from vtlengine.DataTypes import (
|
|
11
|
+
BASIC_TYPES,
|
|
12
|
+
check_unary_implicit_promotion,
|
|
13
|
+
ScalarType,
|
|
14
|
+
Boolean,
|
|
15
|
+
SCALAR_TYPES_CLASS_REVERSE,
|
|
16
|
+
)
|
|
11
17
|
from vtlengine.Operators.Aggregation import extract_grouping_identifiers
|
|
12
18
|
from vtlengine.Operators.Assignment import Assignment
|
|
13
19
|
from vtlengine.Operators.CastOperator import Cast
|
|
14
20
|
from vtlengine.Operators.Comparison import Between, ExistIn
|
|
15
|
-
from vtlengine.Operators.Conditional import If
|
|
21
|
+
from vtlengine.Operators.Conditional import If, Case
|
|
16
22
|
from vtlengine.Operators.General import Eval
|
|
17
23
|
from vtlengine.Operators.HROperators import get_measure_from_dataset, HAAssignment, Hierarchy
|
|
18
24
|
from vtlengine.Operators.Numeric import Round, Trunc
|
|
19
25
|
from vtlengine.Operators.String import Instr, Replace, Substr
|
|
20
26
|
from vtlengine.Operators.Time import Fill_time_series, Time_Aggregation, Current_Date
|
|
21
27
|
from vtlengine.Operators.Validation import Check, Check_Datapoint, Check_Hierarchy
|
|
22
|
-
from vtlengine.Utils import
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
28
|
+
from vtlengine.Utils import (
|
|
29
|
+
AGGREGATION_MAPPING,
|
|
30
|
+
ANALYTIC_MAPPING,
|
|
31
|
+
BINARY_MAPPING,
|
|
32
|
+
JOIN_MAPPING,
|
|
33
|
+
REGULAR_AGGREGATION_MAPPING,
|
|
34
|
+
ROLE_SETTER_MAPPING,
|
|
35
|
+
SET_MAPPING,
|
|
36
|
+
UNARY_MAPPING,
|
|
37
|
+
THEN_ELSE,
|
|
38
|
+
HR_UNARY_MAPPING,
|
|
39
|
+
HR_COMP_MAPPING,
|
|
40
|
+
HR_NUM_BINARY_MAPPING,
|
|
41
|
+
)
|
|
42
|
+
from vtlengine.files.output import save_datapoints
|
|
43
|
+
from vtlengine.files.output._time_period_representation import TimePeriodRepresentation
|
|
26
44
|
from vtlengine.files.parser import load_datapoints, _fill_dataset_empty_data
|
|
27
45
|
|
|
28
46
|
from vtlengine.AST.ASTTemplate import ASTTemplate
|
|
29
|
-
from vtlengine.AST.DAG import HRDAGAnalyzer
|
|
30
|
-
from vtlengine.AST.
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
47
|
+
from vtlengine.AST.DAG import HRDAGAnalyzer
|
|
48
|
+
from vtlengine.AST.DAG._words import GLOBAL, DELETE, INSERT
|
|
49
|
+
from vtlengine.AST.Grammar.tokens import (
|
|
50
|
+
AGGREGATE,
|
|
51
|
+
ALL,
|
|
52
|
+
APPLY,
|
|
53
|
+
AS,
|
|
54
|
+
BETWEEN,
|
|
55
|
+
CHECK_DATAPOINT,
|
|
56
|
+
DROP,
|
|
57
|
+
EXISTS_IN,
|
|
58
|
+
EXTERNAL,
|
|
59
|
+
FILTER,
|
|
60
|
+
HAVING,
|
|
61
|
+
INSTR,
|
|
62
|
+
KEEP,
|
|
63
|
+
MEMBERSHIP,
|
|
64
|
+
REPLACE,
|
|
65
|
+
ROUND,
|
|
66
|
+
SUBSTR,
|
|
67
|
+
TRUNC,
|
|
68
|
+
WHEN,
|
|
69
|
+
FILL_TIME_SERIES,
|
|
70
|
+
CAST,
|
|
71
|
+
CHECK_HIERARCHY,
|
|
72
|
+
HIERARCHY,
|
|
73
|
+
EQ,
|
|
74
|
+
CURRENT_DATE,
|
|
75
|
+
CALC,
|
|
76
|
+
COUNT,
|
|
77
|
+
)
|
|
34
78
|
from vtlengine.Exceptions import SemanticError
|
|
35
|
-
from vtlengine.Model import
|
|
36
|
-
|
|
37
|
-
|
|
79
|
+
from vtlengine.Model import (
|
|
80
|
+
DataComponent,
|
|
81
|
+
Dataset,
|
|
82
|
+
ExternalRoutine,
|
|
83
|
+
Role,
|
|
84
|
+
Scalar,
|
|
85
|
+
ScalarSet,
|
|
86
|
+
Component,
|
|
87
|
+
ValueDomain,
|
|
88
|
+
)
|
|
38
89
|
|
|
39
90
|
|
|
40
91
|
# noinspection PyTypeChecker
|
|
@@ -47,13 +98,13 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
47
98
|
# Analysis mode
|
|
48
99
|
only_semantic: bool = False
|
|
49
100
|
# Memory efficient
|
|
50
|
-
ds_analysis: Optional[
|
|
101
|
+
ds_analysis: Optional[Dict[str, Any]] = None
|
|
51
102
|
datapoints_paths: Optional[Dict[str, Path]] = None
|
|
52
103
|
output_path: Optional[Union[str, Path]] = None
|
|
53
104
|
# Time Period Representation
|
|
54
105
|
time_period_representation: Optional[TimePeriodRepresentation] = None
|
|
55
106
|
# Flags to change behavior
|
|
56
|
-
|
|
107
|
+
nested_condition: Union[str, bool] = False
|
|
57
108
|
is_from_assignment: bool = False
|
|
58
109
|
is_from_component_assignment: bool = False
|
|
59
110
|
is_from_regular_aggregation: bool = False
|
|
@@ -65,55 +116,60 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
65
116
|
is_from_condition: bool = False
|
|
66
117
|
is_from_hr_val: bool = False
|
|
67
118
|
is_from_hr_agg: bool = False
|
|
68
|
-
|
|
119
|
+
condition_stack: Optional[List[str]] = None
|
|
69
120
|
# Handlers for simplicity
|
|
70
121
|
regular_aggregation_dataset: Optional[Dataset] = None
|
|
71
122
|
aggregation_grouping: Optional[List[str]] = None
|
|
72
123
|
aggregation_dataset: Optional[Dataset] = None
|
|
73
|
-
then_condition_dataset: Optional[List[
|
|
74
|
-
else_condition_dataset: Optional[List[
|
|
124
|
+
then_condition_dataset: Optional[List[Any]] = None
|
|
125
|
+
else_condition_dataset: Optional[List[Any]] = None
|
|
75
126
|
ruleset_dataset: Optional[Dataset] = None
|
|
76
127
|
rule_data: Optional[pd.DataFrame] = None
|
|
77
|
-
ruleset_signature: Dict[str, str] = None
|
|
78
|
-
udo_params: List[Dict[str, Any]] = None
|
|
128
|
+
ruleset_signature: Optional[Dict[str, str]] = None
|
|
129
|
+
udo_params: Optional[List[Dict[str, Any]]] = None
|
|
79
130
|
hr_agg_rules_computed: Optional[Dict[str, pd.DataFrame]] = None
|
|
80
131
|
ruleset_mode: Optional[str] = None
|
|
81
132
|
hr_input: Optional[str] = None
|
|
82
133
|
hr_partial_is_valid: Optional[List[bool]] = None
|
|
83
134
|
hr_condition: Optional[Dict[str, str]] = None
|
|
84
135
|
# DL
|
|
85
|
-
dprs: Dict[str, Dict[str, Any]] = None
|
|
86
|
-
udos: Dict[str, Dict[str, Any]] = None
|
|
87
|
-
hrs: Dict[str, Dict[str, Any]] = None
|
|
136
|
+
dprs: Optional[Dict[str, Optional[Dict[str, Any]]]] = None
|
|
137
|
+
udos: Optional[Dict[str, Optional[Dict[str, Any]]]] = None
|
|
138
|
+
hrs: Optional[Dict[str, Optional[Dict[str, Any]]]] = None
|
|
88
139
|
|
|
89
140
|
# **********************************
|
|
90
141
|
# * *
|
|
91
142
|
# * Memory efficient *
|
|
92
143
|
# * *
|
|
93
144
|
# **********************************
|
|
94
|
-
def _load_datapoints_efficient(self, statement_num: int):
|
|
145
|
+
def _load_datapoints_efficient(self, statement_num: int) -> None:
|
|
95
146
|
if self.datapoints_paths is None:
|
|
96
147
|
return
|
|
148
|
+
if self.ds_analysis is None:
|
|
149
|
+
return
|
|
97
150
|
if statement_num not in self.ds_analysis[INSERT]:
|
|
98
151
|
return
|
|
99
152
|
for ds_name in self.ds_analysis[INSERT][statement_num]:
|
|
100
153
|
if ds_name in self.datapoints_paths:
|
|
101
|
-
self.datasets[ds_name].data = load_datapoints(
|
|
102
|
-
|
|
103
|
-
|
|
154
|
+
self.datasets[ds_name].data = load_datapoints(
|
|
155
|
+
self.datasets[ds_name].components, ds_name, self.datapoints_paths[ds_name]
|
|
156
|
+
)
|
|
104
157
|
elif ds_name in self.datasets and self.datasets[ds_name].data is None:
|
|
105
158
|
_fill_dataset_empty_data(self.datasets[ds_name])
|
|
106
159
|
|
|
107
|
-
def _save_datapoints_efficient(self, statement_num: int):
|
|
160
|
+
def _save_datapoints_efficient(self, statement_num: int) -> None:
|
|
108
161
|
if self.output_path is None:
|
|
109
162
|
# Keeping the data in memory if no output path is provided
|
|
110
163
|
return
|
|
164
|
+
if self.ds_analysis is None:
|
|
165
|
+
return
|
|
111
166
|
if statement_num not in self.ds_analysis[DELETE]:
|
|
112
167
|
return
|
|
113
168
|
for ds_name in self.ds_analysis[DELETE][statement_num]:
|
|
114
|
-
if (
|
|
115
|
-
|
|
116
|
-
|
|
169
|
+
if (
|
|
170
|
+
ds_name not in self.datasets
|
|
171
|
+
or not isinstance(self.datasets[ds_name], Dataset)
|
|
172
|
+
or self.datasets[ds_name].data is None
|
|
117
173
|
):
|
|
118
174
|
continue
|
|
119
175
|
if ds_name in self.ds_analysis[GLOBAL]:
|
|
@@ -122,9 +178,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
122
178
|
continue
|
|
123
179
|
|
|
124
180
|
# Saving only datasets, no scalars
|
|
125
|
-
save_datapoints(
|
|
126
|
-
|
|
127
|
-
|
|
181
|
+
save_datapoints(
|
|
182
|
+
self.time_period_representation, self.datasets[ds_name], self.output_path
|
|
183
|
+
)
|
|
128
184
|
self.datasets[ds_name].data = None
|
|
129
185
|
|
|
130
186
|
# **********************************
|
|
@@ -142,6 +198,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
142
198
|
results = {}
|
|
143
199
|
for child in node.children:
|
|
144
200
|
if isinstance(child, (AST.Assignment, AST.PersistentAssignment)):
|
|
201
|
+
vtlengine.Exceptions.dataset_output = child.left.value # type: ignore[attr-defined]
|
|
145
202
|
self._load_datapoints_efficient(statement_num)
|
|
146
203
|
if not isinstance(child, (AST.HRuleset, AST.DPRuleset, AST.Operator)):
|
|
147
204
|
if not isinstance(child, (AST.Assignment, AST.PersistentAssignment)):
|
|
@@ -150,15 +207,18 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
150
207
|
|
|
151
208
|
# Reset some handlers (joins and if)
|
|
152
209
|
self.is_from_join = False
|
|
153
|
-
self.
|
|
210
|
+
self.condition_stack = None
|
|
154
211
|
self.then_condition_dataset = None
|
|
155
212
|
self.else_condition_dataset = None
|
|
156
|
-
self.
|
|
213
|
+
self.nested_condition = False
|
|
157
214
|
|
|
158
215
|
if result is None:
|
|
159
216
|
continue
|
|
217
|
+
|
|
218
|
+
# Removing output dataset
|
|
219
|
+
vtlengine.Exceptions.dataset_output = None
|
|
160
220
|
# Save results
|
|
161
|
-
self.datasets[result.name] = result
|
|
221
|
+
self.datasets[result.name] = copy(result)
|
|
162
222
|
results[result.name] = result
|
|
163
223
|
self._save_datapoints_efficient(statement_num)
|
|
164
224
|
statement_num += 1
|
|
@@ -190,14 +250,14 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
190
250
|
if len(param_info) > 1:
|
|
191
251
|
previous_default = param_info[0]
|
|
192
252
|
for i in [1, len(param_info) - 1]:
|
|
193
|
-
if previous_default
|
|
253
|
+
if previous_default and not param_info[i]:
|
|
194
254
|
raise SemanticError("1-3-12")
|
|
195
255
|
previous_default = param_info[i]
|
|
196
256
|
|
|
197
257
|
self.udos[node.op] = {
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
258
|
+
"params": param_info,
|
|
259
|
+
"expression": node.expression,
|
|
260
|
+
"output": node.output_type,
|
|
201
261
|
}
|
|
202
262
|
|
|
203
263
|
def visit_DPRuleset(self, node: AST.DPRuleset) -> None:
|
|
@@ -209,27 +269,35 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
209
269
|
raise SemanticError("1-4-1-7", type="Datapoint Ruleset", name=node.name)
|
|
210
270
|
if len(rule_names) == 0:
|
|
211
271
|
for i, rule in enumerate(node.rules):
|
|
212
|
-
rule.name = i + 1
|
|
272
|
+
rule.name = (i + 1).__str__()
|
|
213
273
|
|
|
214
274
|
if len(rule_names) != len(set(rule_names)):
|
|
215
275
|
not_unique = [name for name in rule_names if rule_names.count(name) > 1]
|
|
216
|
-
raise SemanticError(
|
|
217
|
-
|
|
218
|
-
|
|
276
|
+
raise SemanticError(
|
|
277
|
+
"1-4-1-5",
|
|
278
|
+
type="Datapoint Ruleset",
|
|
279
|
+
names=", ".join(not_unique),
|
|
280
|
+
ruleset_name=node.name,
|
|
281
|
+
)
|
|
219
282
|
|
|
220
283
|
# Signature has the actual parameters names or aliases if provided
|
|
221
284
|
signature_actual_names = {}
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
285
|
+
if not isinstance(node.params, AST.DefIdentifier):
|
|
286
|
+
for param in node.params:
|
|
287
|
+
if param.alias is not None:
|
|
288
|
+
signature_actual_names[param.alias] = param.value
|
|
289
|
+
else:
|
|
290
|
+
signature_actual_names[param.value] = param.value
|
|
227
291
|
|
|
228
292
|
ruleset_data = {
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
293
|
+
"rules": node.rules,
|
|
294
|
+
"signature": signature_actual_names,
|
|
295
|
+
"params": (
|
|
296
|
+
[x.value for x in node.params]
|
|
297
|
+
if not isinstance(node.params, AST.DefIdentifier)
|
|
298
|
+
else []
|
|
299
|
+
),
|
|
300
|
+
"signature_type": node.signature_type,
|
|
233
301
|
}
|
|
234
302
|
|
|
235
303
|
# Adding the ruleset to the dprs dictionary
|
|
@@ -252,7 +320,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
252
320
|
raise ValueError("All rules must have a name, or none of them")
|
|
253
321
|
if len(rule_names) == 0:
|
|
254
322
|
for i, rule in enumerate(node.rules):
|
|
255
|
-
rule.name = i + 1
|
|
323
|
+
rule.name = (i + 1).__str__()
|
|
256
324
|
|
|
257
325
|
cond_comp = []
|
|
258
326
|
if isinstance(node.element, list):
|
|
@@ -262,18 +330,21 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
262
330
|
signature_actual_name = node.element.value
|
|
263
331
|
|
|
264
332
|
ruleset_data = {
|
|
265
|
-
|
|
266
|
-
|
|
333
|
+
"rules": node.rules,
|
|
334
|
+
"signature": signature_actual_name,
|
|
267
335
|
"condition": cond_comp,
|
|
268
|
-
|
|
336
|
+
"node": node,
|
|
269
337
|
}
|
|
270
338
|
|
|
271
339
|
self.hrs[node.name] = ruleset_data
|
|
272
340
|
|
|
273
341
|
# Execution Language
|
|
274
342
|
def visit_Assignment(self, node: AST.Assignment) -> Any:
|
|
275
|
-
if
|
|
276
|
-
|
|
343
|
+
if (
|
|
344
|
+
self.is_from_join
|
|
345
|
+
and isinstance(node.left, AST.Identifier)
|
|
346
|
+
and node.left.kind == "ComponentID"
|
|
347
|
+
):
|
|
277
348
|
self.is_from_component_assignment = True
|
|
278
349
|
self.is_from_assignment = True
|
|
279
350
|
left_operand: str = self.visit(node.left)
|
|
@@ -285,32 +356,37 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
285
356
|
def visit_PersistentAssignment(self, node: AST.PersistentAssignment) -> Any:
|
|
286
357
|
return self.visit_Assignment(node)
|
|
287
358
|
|
|
288
|
-
def visit_BinOp(self, node: AST.BinOp) ->
|
|
359
|
+
def visit_BinOp(self, node: AST.BinOp) -> Any:
|
|
289
360
|
|
|
290
361
|
is_from_if = False
|
|
291
|
-
if
|
|
292
|
-
|
|
362
|
+
if (
|
|
363
|
+
not self.is_from_condition
|
|
364
|
+
and node.op != MEMBERSHIP
|
|
365
|
+
and self.condition_stack is not None
|
|
366
|
+
and len(self.condition_stack) > 0
|
|
367
|
+
):
|
|
293
368
|
is_from_if = self.is_from_if
|
|
294
369
|
self.is_from_if = False
|
|
295
370
|
|
|
296
371
|
if self.is_from_join and node.op in [MEMBERSHIP, AGGREGATE]:
|
|
297
|
-
if
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
372
|
+
if hasattr(node.left, "value") and hasattr(node.right, "value"):
|
|
373
|
+
if self.udo_params is not None and node.right.value in self.udo_params[-1]:
|
|
374
|
+
comp_name = f"{node.left.value}#{self.udo_params[-1][node.right.value]}"
|
|
375
|
+
else:
|
|
376
|
+
comp_name = f"{node.left.value}#{node.right.value}"
|
|
377
|
+
ast_var_id = AST.VarID(value=comp_name)
|
|
378
|
+
return self.visit(ast_var_id)
|
|
379
|
+
left_operand = self.visit(node.left)
|
|
380
|
+
right_operand = self.visit(node.right)
|
|
306
381
|
if is_from_if:
|
|
307
382
|
left_operand, right_operand = self.merge_then_else_datasets(left_operand, right_operand)
|
|
308
383
|
if node.op == MEMBERSHIP:
|
|
309
|
-
if right_operand not in left_operand.components and
|
|
310
|
-
right_operand = right_operand.split(
|
|
384
|
+
if right_operand not in left_operand.components and "#" in right_operand:
|
|
385
|
+
right_operand = right_operand.split("#")[1]
|
|
311
386
|
if self.is_from_component_assignment:
|
|
312
|
-
return BINARY_MAPPING[node.op].analyze(
|
|
313
|
-
|
|
387
|
+
return BINARY_MAPPING[node.op].analyze(
|
|
388
|
+
left_operand, right_operand, self.is_from_component_assignment
|
|
389
|
+
)
|
|
314
390
|
elif self.is_from_regular_aggregation:
|
|
315
391
|
raise SemanticError("1-1-6-6", dataset_name=left_operand, comp_name=right_operand)
|
|
316
392
|
elif len(left_operand.get_identifiers()) == 0:
|
|
@@ -321,7 +397,11 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
321
397
|
operand = self.visit(node.operand)
|
|
322
398
|
if node.op not in UNARY_MAPPING and node.op not in ROLE_SETTER_MAPPING:
|
|
323
399
|
raise NotImplementedError
|
|
324
|
-
if
|
|
400
|
+
if (
|
|
401
|
+
self.is_from_regular_aggregation
|
|
402
|
+
and self.regular_aggregation_dataset is not None
|
|
403
|
+
and node.op in ROLE_SETTER_MAPPING
|
|
404
|
+
):
|
|
325
405
|
if self.regular_aggregation_dataset.data is None:
|
|
326
406
|
data_size = 0
|
|
327
407
|
else:
|
|
@@ -335,9 +415,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
335
415
|
if node.operand is not None:
|
|
336
416
|
self.visit(node.operand)
|
|
337
417
|
operand = self.aggregation_dataset
|
|
338
|
-
elif self.is_from_regular_aggregation:
|
|
418
|
+
elif self.is_from_regular_aggregation and self.regular_aggregation_dataset is not None:
|
|
339
419
|
operand = self.regular_aggregation_dataset
|
|
340
|
-
if node.operand is not None:
|
|
420
|
+
if node.operand is not None and operand is not None:
|
|
341
421
|
op_comp: DataComponent = self.visit(node.operand)
|
|
342
422
|
comps_to_keep = {}
|
|
343
423
|
for comp_name, comp in self.regular_aggregation_dataset.components.items():
|
|
@@ -347,16 +427,14 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
347
427
|
name=op_comp.name,
|
|
348
428
|
data_type=op_comp.data_type,
|
|
349
429
|
role=op_comp.role,
|
|
350
|
-
nullable=op_comp.nullable
|
|
430
|
+
nullable=op_comp.nullable,
|
|
351
431
|
)
|
|
352
432
|
if operand.data is not None:
|
|
353
433
|
data_to_keep = operand.data[operand.get_identifiers_names()]
|
|
354
434
|
data_to_keep[op_comp.name] = op_comp.data
|
|
355
435
|
else:
|
|
356
436
|
data_to_keep = None
|
|
357
|
-
operand = Dataset(name=operand.name,
|
|
358
|
-
components=comps_to_keep,
|
|
359
|
-
data=data_to_keep)
|
|
437
|
+
operand = Dataset(name=operand.name, components=comps_to_keep, data=data_to_keep)
|
|
360
438
|
else:
|
|
361
439
|
operand = self.visit(node.operand)
|
|
362
440
|
|
|
@@ -370,38 +448,42 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
370
448
|
if node.having_clause is not None and node.grouping is None:
|
|
371
449
|
raise SemanticError("1-3-33")
|
|
372
450
|
|
|
373
|
-
groupings = []
|
|
451
|
+
groupings: Any = []
|
|
374
452
|
having = None
|
|
375
453
|
grouping_op = node.grouping_op
|
|
376
454
|
if node.grouping is not None:
|
|
377
|
-
if grouping_op ==
|
|
455
|
+
if grouping_op == "group all":
|
|
378
456
|
if self.only_semantic:
|
|
379
457
|
data = None
|
|
380
458
|
else:
|
|
381
|
-
data = operand.data
|
|
382
|
-
self.aggregation_dataset = Dataset(
|
|
383
|
-
|
|
384
|
-
|
|
459
|
+
data = copy(operand.data)
|
|
460
|
+
self.aggregation_dataset = Dataset(
|
|
461
|
+
name=operand.name, components=operand.components, data=data
|
|
462
|
+
)
|
|
385
463
|
# For Component handling in operators like time_agg
|
|
386
464
|
self.is_from_grouping = True
|
|
387
465
|
for x in node.grouping:
|
|
388
466
|
groupings.append(self.visit(x))
|
|
389
467
|
self.is_from_grouping = False
|
|
390
|
-
if grouping_op ==
|
|
468
|
+
if grouping_op == "group all":
|
|
391
469
|
comp_grouped = groupings[0]
|
|
392
|
-
if
|
|
470
|
+
if (
|
|
471
|
+
operand.data is not None
|
|
472
|
+
and comp_grouped.data is not None
|
|
473
|
+
and len(comp_grouped.data) > 0
|
|
474
|
+
):
|
|
393
475
|
operand.data[comp_grouped.name] = comp_grouped.data
|
|
394
476
|
groupings = [comp_grouped.name]
|
|
395
477
|
self.aggregation_dataset = None
|
|
396
478
|
if node.having_clause is not None:
|
|
397
|
-
self.aggregation_dataset = Dataset(
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
479
|
+
self.aggregation_dataset = Dataset(
|
|
480
|
+
name=operand.name,
|
|
481
|
+
components=deepcopy(operand.components),
|
|
482
|
+
data=pd.DataFrame(columns=operand.get_components_names()),
|
|
483
|
+
)
|
|
401
484
|
self.aggregation_grouping = extract_grouping_identifiers(
|
|
402
|
-
operand.get_identifiers_names(),
|
|
403
|
-
|
|
404
|
-
groupings)
|
|
485
|
+
operand.get_identifiers_names(), node.grouping_op, groupings
|
|
486
|
+
)
|
|
405
487
|
self.is_from_having = True
|
|
406
488
|
# Empty data analysis on having - we do not care about the result
|
|
407
489
|
self.visit(node.having_clause)
|
|
@@ -409,17 +491,17 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
409
491
|
self.is_from_having = False
|
|
410
492
|
self.aggregation_grouping = None
|
|
411
493
|
self.aggregation_dataset = None
|
|
412
|
-
having = getattr(node.having_clause,
|
|
494
|
+
having = getattr(node.having_clause, "expr", "")
|
|
413
495
|
having = self._format_having_expression_udo(having)
|
|
414
496
|
|
|
415
497
|
elif self.is_from_having:
|
|
416
498
|
groupings = self.aggregation_grouping
|
|
417
499
|
# Setting here group by as we have already selected the identifiers we need
|
|
418
|
-
grouping_op =
|
|
500
|
+
grouping_op = "group by"
|
|
419
501
|
|
|
420
502
|
return AGGREGATION_MAPPING[node.op].analyze(operand, grouping_op, groupings, having)
|
|
421
503
|
|
|
422
|
-
def _format_having_expression_udo(self, having: str):
|
|
504
|
+
def _format_having_expression_udo(self, having: str) -> str:
|
|
423
505
|
if self.udo_params is None:
|
|
424
506
|
return having
|
|
425
507
|
for k, v in self.udo_params[-1].items():
|
|
@@ -438,31 +520,36 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
438
520
|
having = having.replace(old_param, new_param)
|
|
439
521
|
return having
|
|
440
522
|
|
|
441
|
-
def visit_Analytic(self, node: AST.Analytic) ->
|
|
523
|
+
def visit_Analytic(self, node: AST.Analytic) -> Any: # noqa: C901
|
|
524
|
+
component_name = None
|
|
442
525
|
if self.is_from_regular_aggregation:
|
|
526
|
+
if self.regular_aggregation_dataset is None:
|
|
527
|
+
raise SemanticError("1-1-6-10")
|
|
443
528
|
if node.operand is None:
|
|
444
529
|
operand = self.regular_aggregation_dataset
|
|
445
530
|
else:
|
|
446
531
|
operand_comp = self.visit(node.operand)
|
|
532
|
+
component_name = operand_comp.name
|
|
447
533
|
measure_names = self.regular_aggregation_dataset.get_measures_names()
|
|
448
534
|
dataset_components = self.regular_aggregation_dataset.components.copy()
|
|
449
535
|
for name in measure_names:
|
|
450
536
|
if name != operand_comp.name:
|
|
451
537
|
dataset_components.pop(name)
|
|
452
538
|
|
|
453
|
-
if self.only_semantic:
|
|
539
|
+
if self.only_semantic or self.regular_aggregation_dataset.data is None:
|
|
454
540
|
data = None
|
|
455
541
|
else:
|
|
456
|
-
data = self.regular_aggregation_dataset.data[
|
|
457
|
-
dataset_components.keys()]
|
|
542
|
+
data = self.regular_aggregation_dataset.data[dataset_components.keys()]
|
|
458
543
|
|
|
459
|
-
operand = Dataset(
|
|
460
|
-
|
|
461
|
-
|
|
544
|
+
operand = Dataset(
|
|
545
|
+
name=self.regular_aggregation_dataset.name,
|
|
546
|
+
components=dataset_components,
|
|
547
|
+
data=data,
|
|
548
|
+
)
|
|
462
549
|
|
|
463
550
|
else:
|
|
464
|
-
operand
|
|
465
|
-
partitioning = []
|
|
551
|
+
operand = self.visit(node.operand)
|
|
552
|
+
partitioning: Any = []
|
|
466
553
|
ordering = []
|
|
467
554
|
if self.udo_params is not None:
|
|
468
555
|
if node.partition_by is not None:
|
|
@@ -470,15 +557,23 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
470
557
|
if comp_name in self.udo_params[-1]:
|
|
471
558
|
partitioning.append(self.udo_params[-1][comp_name])
|
|
472
559
|
else:
|
|
473
|
-
raise SemanticError(
|
|
474
|
-
|
|
560
|
+
raise SemanticError(
|
|
561
|
+
"2-3-9",
|
|
562
|
+
comp_type="Component",
|
|
563
|
+
comp_name=comp_name,
|
|
564
|
+
param="UDO parameters",
|
|
565
|
+
)
|
|
475
566
|
if node.order_by is not None:
|
|
476
567
|
for o in node.order_by:
|
|
477
568
|
if o.component in self.udo_params[-1]:
|
|
478
569
|
o.component = self.udo_params[-1][o.component]
|
|
479
570
|
else:
|
|
480
|
-
raise SemanticError(
|
|
481
|
-
|
|
571
|
+
raise SemanticError(
|
|
572
|
+
"2-3-9",
|
|
573
|
+
comp_type="Component",
|
|
574
|
+
comp_name=o.component,
|
|
575
|
+
param="UDO parameters",
|
|
576
|
+
)
|
|
482
577
|
ordering = node.order_by
|
|
483
578
|
|
|
484
579
|
else:
|
|
@@ -487,7 +582,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
487
582
|
if not isinstance(operand, Dataset):
|
|
488
583
|
raise SemanticError("2-3-4", op=node.op, comp="dataset")
|
|
489
584
|
if node.partition_by is None:
|
|
490
|
-
order_components =
|
|
585
|
+
order_components = (
|
|
586
|
+
[x.component for x in node.order_by] if node.order_by is not None else []
|
|
587
|
+
)
|
|
491
588
|
partitioning = [x for x in operand.get_identifiers_names() if x not in order_components]
|
|
492
589
|
|
|
493
590
|
params = []
|
|
@@ -498,38 +595,56 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
498
595
|
else:
|
|
499
596
|
params.append(param)
|
|
500
597
|
|
|
501
|
-
result = ANALYTIC_MAPPING[node.op].analyze(
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
598
|
+
result = ANALYTIC_MAPPING[node.op].analyze(
|
|
599
|
+
operand=operand,
|
|
600
|
+
partitioning=partitioning,
|
|
601
|
+
ordering=ordering,
|
|
602
|
+
window=node.window,
|
|
603
|
+
params=params,
|
|
604
|
+
component_name=component_name,
|
|
605
|
+
)
|
|
506
606
|
if not self.is_from_regular_aggregation:
|
|
507
607
|
return result
|
|
508
608
|
|
|
509
609
|
# Extracting the components we need (only identifiers)
|
|
510
|
-
id_columns =
|
|
610
|
+
id_columns = (
|
|
611
|
+
self.regular_aggregation_dataset.get_identifiers_names()
|
|
612
|
+
if (self.regular_aggregation_dataset is not None)
|
|
613
|
+
else None
|
|
614
|
+
)
|
|
511
615
|
|
|
512
616
|
# # Extracting the component we need (only measure)
|
|
513
|
-
|
|
617
|
+
if component_name is None or node.op == COUNT:
|
|
618
|
+
measure_name = result.get_measures_names()[0]
|
|
619
|
+
else:
|
|
620
|
+
measure_name = component_name
|
|
514
621
|
# Joining the result with the original dataset
|
|
515
622
|
if self.only_semantic:
|
|
516
623
|
data = None
|
|
517
624
|
else:
|
|
518
|
-
|
|
519
|
-
self.regular_aggregation_dataset
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
625
|
+
if (
|
|
626
|
+
self.regular_aggregation_dataset is not None
|
|
627
|
+
and self.regular_aggregation_dataset.data is not None
|
|
628
|
+
):
|
|
629
|
+
joined_result = pd.merge(
|
|
630
|
+
self.regular_aggregation_dataset.data[id_columns],
|
|
631
|
+
result.data,
|
|
632
|
+
on=id_columns,
|
|
633
|
+
how="inner",
|
|
634
|
+
)
|
|
635
|
+
data = joined_result[measure_name]
|
|
636
|
+
else:
|
|
637
|
+
data = None
|
|
638
|
+
|
|
639
|
+
return DataComponent(
|
|
640
|
+
name=measure_name,
|
|
641
|
+
data=data,
|
|
642
|
+
data_type=result.components[measure_name].data_type,
|
|
643
|
+
role=result.components[measure_name].role,
|
|
644
|
+
nullable=result.components[measure_name].nullable,
|
|
645
|
+
)
|
|
646
|
+
|
|
647
|
+
def visit_MulOp(self, node: AST.MulOp) -> None:
|
|
533
648
|
"""
|
|
534
649
|
MulOp: (op, children)
|
|
535
650
|
|
|
@@ -583,9 +698,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
583
698
|
return Current_Date.analyze()
|
|
584
699
|
|
|
585
700
|
else:
|
|
586
|
-
raise SemanticError("1-3-5", op_type=
|
|
701
|
+
raise SemanticError("1-3-5", op_type="MulOp", node_op=node.op)
|
|
587
702
|
|
|
588
|
-
def visit_VarID(self, node: AST.VarID) -> Any:
|
|
703
|
+
def visit_VarID(self, node: AST.VarID) -> Any: # noqa: C901
|
|
589
704
|
if self.is_from_assignment:
|
|
590
705
|
return node.value
|
|
591
706
|
# Having takes precedence as it is lower in the AST
|
|
@@ -595,83 +710,102 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
595
710
|
return udo_element
|
|
596
711
|
# If it is only the component or dataset name, we rename the node.value
|
|
597
712
|
node.value = udo_element
|
|
598
|
-
if self.is_from_having or self.is_from_grouping:
|
|
713
|
+
if self.aggregation_dataset is not None and (self.is_from_having or self.is_from_grouping):
|
|
599
714
|
if node.value not in self.aggregation_dataset.components:
|
|
600
|
-
raise SemanticError(
|
|
601
|
-
|
|
715
|
+
raise SemanticError(
|
|
716
|
+
"1-1-1-10",
|
|
717
|
+
op=None,
|
|
718
|
+
comp_name=node.value,
|
|
719
|
+
dataset_name=self.aggregation_dataset.name,
|
|
720
|
+
)
|
|
602
721
|
if self.aggregation_dataset.data is None:
|
|
603
722
|
data = None
|
|
604
723
|
else:
|
|
605
724
|
data = self.aggregation_dataset.data[node.value]
|
|
606
|
-
return DataComponent(
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
725
|
+
return DataComponent(
|
|
726
|
+
name=node.value,
|
|
727
|
+
data=data,
|
|
728
|
+
data_type=self.aggregation_dataset.components[node.value].data_type,
|
|
729
|
+
role=self.aggregation_dataset.components[node.value].role,
|
|
730
|
+
nullable=self.aggregation_dataset.components[node.value].nullable,
|
|
731
|
+
)
|
|
612
732
|
if self.is_from_regular_aggregation:
|
|
613
733
|
if self.is_from_join and node.value in self.datasets.keys():
|
|
614
734
|
return self.datasets[node.value]
|
|
615
|
-
if
|
|
616
|
-
if node.value in self.
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
735
|
+
if self.regular_aggregation_dataset is not None:
|
|
736
|
+
if node.value in self.datasets and isinstance(self.datasets[node.value], Scalar):
|
|
737
|
+
if node.value in self.regular_aggregation_dataset.components:
|
|
738
|
+
raise SemanticError("1-1-6-11", comp_name=node.value)
|
|
739
|
+
return self.datasets[node.value]
|
|
740
|
+
if self.regular_aggregation_dataset.data is not None:
|
|
741
|
+
if (
|
|
742
|
+
self.is_from_join
|
|
743
|
+
and node.value
|
|
744
|
+
not in self.regular_aggregation_dataset.get_components_names()
|
|
745
|
+
):
|
|
746
|
+
is_partial_present = 0
|
|
747
|
+
found_comp = None
|
|
748
|
+
for comp_name in self.regular_aggregation_dataset.get_components_names():
|
|
749
|
+
if "#" in comp_name and comp_name.split("#")[1] == node.value:
|
|
750
|
+
is_partial_present += 1
|
|
751
|
+
found_comp = comp_name
|
|
752
|
+
elif "#" in node.value and node.value.split("#")[1] == comp_name:
|
|
753
|
+
is_partial_present += 1
|
|
754
|
+
found_comp = comp_name
|
|
755
|
+
if is_partial_present == 0:
|
|
756
|
+
raise SemanticError(
|
|
757
|
+
"1-1-1-10",
|
|
758
|
+
comp_name=node.value,
|
|
759
|
+
dataset_name=self.regular_aggregation_dataset.name,
|
|
760
|
+
)
|
|
761
|
+
elif is_partial_present == 2:
|
|
762
|
+
raise SemanticError("1-1-13-9", comp_name=node.value)
|
|
763
|
+
node.value = found_comp
|
|
764
|
+
if node.value not in self.regular_aggregation_dataset.components:
|
|
765
|
+
raise SemanticError(
|
|
766
|
+
"1-1-1-10",
|
|
767
|
+
comp_name=node.value,
|
|
768
|
+
dataset_name=self.regular_aggregation_dataset.name,
|
|
769
|
+
)
|
|
770
|
+
data = self.regular_aggregation_dataset.data[node.value]
|
|
771
|
+
else:
|
|
772
|
+
data = None
|
|
773
|
+
return DataComponent(
|
|
774
|
+
name=node.value,
|
|
775
|
+
data=data,
|
|
776
|
+
data_type=self.regular_aggregation_dataset.components[node.value].data_type,
|
|
777
|
+
role=self.regular_aggregation_dataset.components[node.value].role,
|
|
778
|
+
nullable=self.regular_aggregation_dataset.components[node.value].nullable,
|
|
779
|
+
)
|
|
780
|
+
if (
|
|
781
|
+
self.is_from_rule
|
|
782
|
+
and self.ruleset_dataset is not None
|
|
783
|
+
and self.ruleset_signature is not None
|
|
784
|
+
):
|
|
654
785
|
if node.value not in self.ruleset_signature:
|
|
655
786
|
raise SemanticError("1-1-10-7", comp_name=node.value)
|
|
656
787
|
comp_name = self.ruleset_signature[node.value]
|
|
657
788
|
if comp_name not in self.ruleset_dataset.components:
|
|
658
|
-
raise SemanticError(
|
|
659
|
-
|
|
789
|
+
raise SemanticError(
|
|
790
|
+
"1-1-1-10", comp_name=node.value, dataset_name=self.ruleset_dataset.name
|
|
791
|
+
)
|
|
660
792
|
if self.rule_data is None:
|
|
661
793
|
data = None
|
|
662
794
|
else:
|
|
663
795
|
data = self.rule_data[comp_name]
|
|
664
|
-
return DataComponent(
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
796
|
+
return DataComponent(
|
|
797
|
+
name=comp_name,
|
|
798
|
+
data=data,
|
|
799
|
+
data_type=self.ruleset_dataset.components[comp_name].data_type,
|
|
800
|
+
role=self.ruleset_dataset.components[comp_name].role,
|
|
801
|
+
nullable=self.ruleset_dataset.components[comp_name].nullable,
|
|
802
|
+
)
|
|
669
803
|
if node.value not in self.datasets:
|
|
670
804
|
raise SemanticError("2-3-6", dataset_name=node.value)
|
|
671
805
|
return self.datasets[node.value]
|
|
672
806
|
|
|
673
807
|
def visit_Collection(self, node: AST.Collection) -> Any:
|
|
674
|
-
if node.kind ==
|
|
808
|
+
if node.kind == "Set":
|
|
675
809
|
elements = []
|
|
676
810
|
duplicates = []
|
|
677
811
|
for child in node.children:
|
|
@@ -685,14 +819,14 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
685
819
|
if len(duplicates) > 0:
|
|
686
820
|
raise SemanticError("1-3-9", duplicates=duplicates)
|
|
687
821
|
for element in elements:
|
|
688
|
-
if type(element)
|
|
822
|
+
if type(element) is not type(elements[0]):
|
|
689
823
|
raise Exception("All elements in a set must be of the same type")
|
|
690
824
|
if len(elements) == 0:
|
|
691
825
|
raise Exception("A set must contain at least one element")
|
|
692
826
|
if len(elements) != len(set(elements)):
|
|
693
827
|
raise Exception("A set must not contain duplicates")
|
|
694
828
|
return ScalarSet(data_type=BASIC_TYPES[type(elements[0])], values=elements)
|
|
695
|
-
elif node.kind ==
|
|
829
|
+
elif node.kind == "ValueDomain":
|
|
696
830
|
if self.value_domains is None:
|
|
697
831
|
raise SemanticError("2-3-10", comp_type="Value Domains")
|
|
698
832
|
if node.name not in self.value_domains:
|
|
@@ -702,7 +836,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
702
836
|
else:
|
|
703
837
|
raise SemanticError("1-3-26", name=node.name)
|
|
704
838
|
|
|
705
|
-
def visit_RegularAggregation(self, node: AST.RegularAggregation) -> None:
|
|
839
|
+
def visit_RegularAggregation(self, node: AST.RegularAggregation) -> None: # noqa: C901
|
|
706
840
|
operands = []
|
|
707
841
|
dataset = self.visit(node.dataset)
|
|
708
842
|
if isinstance(dataset, Scalar):
|
|
@@ -720,11 +854,19 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
720
854
|
raise SemanticError("1-3-35", op=node.op)
|
|
721
855
|
if node.op == AGGREGATE:
|
|
722
856
|
# Extracting the role encoded inside the children assignments
|
|
723
|
-
role_info = {
|
|
857
|
+
role_info = {
|
|
858
|
+
child.left.value: child.left.role
|
|
859
|
+
for child in node.children
|
|
860
|
+
if hasattr(child, "left")
|
|
861
|
+
}
|
|
724
862
|
dataset = copy(operands[0])
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
863
|
+
if self.regular_aggregation_dataset is not None:
|
|
864
|
+
dataset.name = self.regular_aggregation_dataset.name
|
|
865
|
+
dataset.components = {
|
|
866
|
+
comp_name: comp
|
|
867
|
+
for comp_name, comp in dataset.components.items()
|
|
868
|
+
if comp.role != Role.MEASURE
|
|
869
|
+
}
|
|
728
870
|
if dataset.data is not None:
|
|
729
871
|
dataset.data = dataset.data[dataset.get_identifiers_names()]
|
|
730
872
|
aux_operands = []
|
|
@@ -733,8 +875,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
733
875
|
data = operand.data[measure.name] if operand.data is not None else None
|
|
734
876
|
# Getting role from encoded information
|
|
735
877
|
# (handling also UDO params as it is present in the value of the mapping)
|
|
736
|
-
if
|
|
737
|
-
operand.name in self.udo_params[-1].values()):
|
|
878
|
+
if self.udo_params is not None and operand.name in self.udo_params[-1].values():
|
|
738
879
|
role = None
|
|
739
880
|
for k, v in self.udo_params[-1].items():
|
|
740
881
|
if isinstance(v, str) and v == operand.name:
|
|
@@ -742,41 +883,65 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
742
883
|
role = role_info[role_key]
|
|
743
884
|
else:
|
|
744
885
|
role = role_info[operand.name]
|
|
745
|
-
aux_operands.append(
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
886
|
+
aux_operands.append(
|
|
887
|
+
DataComponent(
|
|
888
|
+
name=operand.name,
|
|
889
|
+
data=data,
|
|
890
|
+
data_type=measure.data_type,
|
|
891
|
+
role=role if role is not None else measure.role,
|
|
892
|
+
nullable=measure.nullable,
|
|
893
|
+
)
|
|
894
|
+
)
|
|
750
895
|
operands = aux_operands
|
|
751
896
|
self.regular_aggregation_dataset = None
|
|
752
897
|
if node.op == FILTER:
|
|
753
|
-
if not isinstance(operands[0], DataComponent):
|
|
898
|
+
if not isinstance(operands[0], DataComponent) and hasattr(child, "left"):
|
|
754
899
|
measure = child.left.value
|
|
755
|
-
operands[0] = DataComponent(
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
900
|
+
operands[0] = DataComponent(
|
|
901
|
+
name=measure,
|
|
902
|
+
data=operands[0].data[measure],
|
|
903
|
+
data_type=operands[0].components[measure].data_type,
|
|
904
|
+
role=operands[0].components[measure].role,
|
|
905
|
+
nullable=operands[0].components[measure].nullable,
|
|
906
|
+
)
|
|
760
907
|
return REGULAR_AGGREGATION_MAPPING[node.op].analyze(operands[0], dataset)
|
|
761
908
|
if self.is_from_join:
|
|
762
909
|
if node.op in [DROP, KEEP]:
|
|
763
|
-
operands = [
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
910
|
+
operands = [
|
|
911
|
+
(
|
|
912
|
+
operand.get_measures_names()
|
|
913
|
+
if isinstance(operand, Dataset)
|
|
914
|
+
else (
|
|
915
|
+
operand.name
|
|
916
|
+
if isinstance(operand, DataComponent)
|
|
917
|
+
and operand.role is not Role.IDENTIFIER
|
|
918
|
+
else operand
|
|
919
|
+
)
|
|
920
|
+
)
|
|
921
|
+
for operand in operands
|
|
922
|
+
]
|
|
923
|
+
operands = list(
|
|
924
|
+
set(
|
|
925
|
+
[
|
|
926
|
+
item
|
|
927
|
+
for sublist in operands
|
|
928
|
+
for item in (sublist if isinstance(sublist, list) else [sublist])
|
|
929
|
+
]
|
|
930
|
+
)
|
|
931
|
+
)
|
|
769
932
|
result = REGULAR_AGGREGATION_MAPPING[node.op].analyze(operands, dataset)
|
|
770
933
|
if node.isLast:
|
|
771
934
|
if result.data is not None:
|
|
772
935
|
result.data.rename(
|
|
773
|
-
columns={col: col[col.find(
|
|
774
|
-
inplace=True
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
936
|
+
columns={col: col[col.find("#") + 1 :] for col in result.data.columns},
|
|
937
|
+
inplace=True,
|
|
938
|
+
)
|
|
939
|
+
result.components = {
|
|
940
|
+
comp_name[comp_name.find("#") + 1 :]: comp
|
|
941
|
+
for comp_name, comp in result.components.items()
|
|
942
|
+
}
|
|
778
943
|
for comp in result.components.values():
|
|
779
|
-
comp.name = comp.name[comp.name.find(
|
|
944
|
+
comp.name = comp.name[comp.name.find("#") + 1 :]
|
|
780
945
|
if result.data is not None:
|
|
781
946
|
result.data.reset_index(drop=True, inplace=True)
|
|
782
947
|
self.is_from_join = False
|
|
@@ -793,8 +958,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
793
958
|
thenValue = self.visit(node.thenOp)
|
|
794
959
|
elseValue = self.visit(node.elseOp)
|
|
795
960
|
if not isinstance(thenValue, Scalar) or not isinstance(elseValue, Scalar):
|
|
796
|
-
raise SemanticError(
|
|
797
|
-
|
|
961
|
+
raise SemanticError(
|
|
962
|
+
"1-1-9-3", op="If_op", then_name=thenValue.name, else_name=elseValue.name
|
|
963
|
+
)
|
|
798
964
|
if condition.value:
|
|
799
965
|
return self.visit(node.thenOp)
|
|
800
966
|
else:
|
|
@@ -802,59 +968,81 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
802
968
|
|
|
803
969
|
# Analysis for data component and dataset
|
|
804
970
|
else:
|
|
805
|
-
if self.
|
|
806
|
-
self.
|
|
971
|
+
if self.condition_stack is None:
|
|
972
|
+
self.condition_stack = []
|
|
807
973
|
if self.then_condition_dataset is None:
|
|
808
974
|
self.then_condition_dataset = []
|
|
809
975
|
if self.else_condition_dataset is None:
|
|
810
976
|
self.else_condition_dataset = []
|
|
811
977
|
self.generate_then_else_datasets(copy(condition))
|
|
812
978
|
|
|
813
|
-
self.
|
|
979
|
+
self.condition_stack.append(THEN_ELSE["then"])
|
|
814
980
|
self.is_from_if = True
|
|
815
|
-
self.
|
|
981
|
+
self.nested_condition = "T" if isinstance(node.thenOp, AST.If) else False
|
|
816
982
|
thenOp = self.visit(node.thenOp)
|
|
817
983
|
if isinstance(thenOp, Scalar) or not isinstance(node.thenOp, AST.BinOp):
|
|
818
984
|
self.then_condition_dataset.pop()
|
|
819
|
-
self.
|
|
985
|
+
self.condition_stack.pop()
|
|
820
986
|
|
|
821
|
-
self.
|
|
987
|
+
self.condition_stack.append(THEN_ELSE["else"])
|
|
822
988
|
self.is_from_if = True
|
|
823
|
-
self.
|
|
989
|
+
self.nested_condition = "E" if isinstance(node.elseOp, AST.If) else False
|
|
824
990
|
elseOp = self.visit(node.elseOp)
|
|
825
991
|
if isinstance(elseOp, Scalar) or (
|
|
826
|
-
|
|
992
|
+
not isinstance(node.elseOp, AST.BinOp) and not isinstance(node.elseOp, AST.If)
|
|
993
|
+
):
|
|
827
994
|
if len(self.else_condition_dataset) > 0:
|
|
828
995
|
self.else_condition_dataset.pop()
|
|
829
|
-
if len(self.
|
|
830
|
-
self.
|
|
996
|
+
if len(self.condition_stack) > 0:
|
|
997
|
+
self.condition_stack.pop()
|
|
831
998
|
|
|
832
999
|
return If.analyze(condition, thenOp, elseOp)
|
|
833
1000
|
|
|
1001
|
+
def visit_Case(self, node: AST.Case) -> Any:
|
|
1002
|
+
conditions: List[Any] = []
|
|
1003
|
+
thenOps: List[Any] = []
|
|
1004
|
+
|
|
1005
|
+
if self.condition_stack is None:
|
|
1006
|
+
self.condition_stack = []
|
|
1007
|
+
|
|
1008
|
+
while node.cases:
|
|
1009
|
+
case = node.cases.pop(0)
|
|
1010
|
+
self.is_from_condition = True
|
|
1011
|
+
conditions.append(self.visit(case.condition))
|
|
1012
|
+
self.is_from_condition = False
|
|
1013
|
+
thenOps.append(self.visit(case.thenOp))
|
|
1014
|
+
|
|
1015
|
+
return Case.analyze(conditions, thenOps, self.visit(node.elseOp))
|
|
1016
|
+
|
|
834
1017
|
def visit_RenameNode(self, node: AST.RenameNode) -> Any:
|
|
835
1018
|
if self.udo_params is not None:
|
|
836
1019
|
if "#" in node.old_name:
|
|
837
|
-
if node.old_name.split(
|
|
838
|
-
comp_name = self.udo_params[-1][node.old_name.split(
|
|
1020
|
+
if node.old_name.split("#")[1] in self.udo_params[-1]:
|
|
1021
|
+
comp_name = self.udo_params[-1][node.old_name.split("#")[1]]
|
|
839
1022
|
node.old_name = f"{node.old_name.split('#')[0]}#{comp_name}"
|
|
840
1023
|
else:
|
|
841
1024
|
if node.old_name in self.udo_params[-1]:
|
|
842
1025
|
node.old_name = self.udo_params[-1][node.old_name]
|
|
843
1026
|
|
|
844
|
-
if
|
|
845
|
-
|
|
1027
|
+
if (
|
|
1028
|
+
self.is_from_join
|
|
1029
|
+
and self.regular_aggregation_dataset is not None
|
|
1030
|
+
and node.old_name not in self.regular_aggregation_dataset.components
|
|
1031
|
+
):
|
|
1032
|
+
node.old_name = node.old_name.split("#")[1]
|
|
846
1033
|
|
|
847
1034
|
return node
|
|
848
1035
|
|
|
849
1036
|
def visit_Constant(self, node: AST.Constant) -> Any:
|
|
850
|
-
return Scalar(
|
|
851
|
-
|
|
1037
|
+
return Scalar(
|
|
1038
|
+
name=str(node.value), value=node.value, data_type=BASIC_TYPES[type(node.value)]
|
|
1039
|
+
)
|
|
852
1040
|
|
|
853
1041
|
def visit_JoinOp(self, node: AST.JoinOp) -> None:
|
|
854
1042
|
clause_elements = []
|
|
855
1043
|
for clause in node.clauses:
|
|
856
1044
|
clause_elements.append(self.visit(clause))
|
|
857
|
-
if hasattr(clause,
|
|
1045
|
+
if hasattr(clause, "op") and clause.op == AS:
|
|
858
1046
|
# TODO: We need to delete somewhere the join datasets with alias that are added here
|
|
859
1047
|
self.datasets[clause_elements[-1].name] = clause_elements[-1]
|
|
860
1048
|
|
|
@@ -865,7 +1053,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
865
1053
|
def visit_ParamConstant(self, node: AST.ParamConstant) -> str:
|
|
866
1054
|
return node.value
|
|
867
1055
|
|
|
868
|
-
def visit_ParamOp(self, node: AST.ParamOp) -> None:
|
|
1056
|
+
def visit_ParamOp(self, node: AST.ParamOp) -> None: # noqa: C901
|
|
869
1057
|
if node.op == ROUND:
|
|
870
1058
|
op_element = self.visit(node.children[0])
|
|
871
1059
|
if len(node.params) != 0:
|
|
@@ -899,26 +1087,34 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
899
1087
|
else:
|
|
900
1088
|
raise NotImplementedError
|
|
901
1089
|
elif node.op == HAVING:
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
1090
|
+
if self.aggregation_dataset is not None and self.aggregation_grouping is not None:
|
|
1091
|
+
for id_name in self.aggregation_grouping:
|
|
1092
|
+
if id_name not in self.aggregation_dataset.components:
|
|
1093
|
+
raise SemanticError("1-1-2-4", op=node.op, id_name=id_name)
|
|
1094
|
+
if len(self.aggregation_dataset.get_measures()) != 1:
|
|
1095
|
+
raise ValueError("Only one measure is allowed")
|
|
1096
|
+
# Deepcopy is necessary for components to avoid changing the original dataset
|
|
1097
|
+
self.aggregation_dataset.components = {
|
|
1098
|
+
comp_name: deepcopy(comp)
|
|
1099
|
+
for comp_name, comp in self.aggregation_dataset.components.items()
|
|
1100
|
+
if comp_name in self.aggregation_grouping or comp.role == Role.MEASURE
|
|
1101
|
+
}
|
|
1102
|
+
|
|
1103
|
+
self.aggregation_dataset.data = (
|
|
1104
|
+
self.aggregation_dataset.data[
|
|
1105
|
+
self.aggregation_dataset.get_identifiers_names()
|
|
1106
|
+
+ self.aggregation_dataset.get_measures_names()
|
|
1107
|
+
]
|
|
1108
|
+
if (self.aggregation_dataset.data is not None)
|
|
1109
|
+
else None
|
|
1110
|
+
)
|
|
915
1111
|
result = self.visit(node.params)
|
|
916
1112
|
measure = result.get_measures()[0]
|
|
917
1113
|
if measure.data_type != Boolean:
|
|
918
1114
|
raise SemanticError("1-1-2-3", type=SCALAR_TYPES_CLASS_REVERSE[Boolean])
|
|
919
1115
|
return None
|
|
920
1116
|
elif node.op == FILL_TIME_SERIES:
|
|
921
|
-
mode = self.visit(node.params[0]) if len(node.params) == 1 else
|
|
1117
|
+
mode = self.visit(node.params[0]) if len(node.params) == 1 else "all"
|
|
922
1118
|
return Fill_time_series.analyze(self.visit(node.children[0]), mode)
|
|
923
1119
|
elif node.op == CAST:
|
|
924
1120
|
operand = self.visit(node.children[0])
|
|
@@ -932,7 +1128,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
932
1128
|
if self.dprs is None:
|
|
933
1129
|
raise SemanticError("1-3-19", node_type="Datapoint Rulesets", node_value="")
|
|
934
1130
|
# Checking if ruleset exists
|
|
935
|
-
dpr_name = node.children[1]
|
|
1131
|
+
dpr_name: Any = node.children[1]
|
|
936
1132
|
if dpr_name not in self.dprs:
|
|
937
1133
|
raise SemanticError("1-3-19", node_type="Datapoint Ruleset", node_value=dpr_name)
|
|
938
1134
|
dpr_info = self.dprs[dpr_name]
|
|
@@ -944,37 +1140,44 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
944
1140
|
# Checking if list of components supplied is valid
|
|
945
1141
|
if len(node.children) > 2:
|
|
946
1142
|
for comp_name in node.children[2:]:
|
|
947
|
-
if comp_name not in dataset_element.components:
|
|
948
|
-
raise SemanticError(
|
|
949
|
-
|
|
950
|
-
|
|
1143
|
+
if comp_name.__str__() not in dataset_element.components:
|
|
1144
|
+
raise SemanticError(
|
|
1145
|
+
"1-1-1-10", comp_name=comp_name, dataset_name=dataset_element.name
|
|
1146
|
+
)
|
|
1147
|
+
if dpr_info is not None and dpr_info["signature_type"] == "variable":
|
|
951
1148
|
for i, comp_name in enumerate(node.children[2:]):
|
|
952
|
-
if comp_name != dpr_info[
|
|
953
|
-
raise SemanticError(
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
1149
|
+
if comp_name != dpr_info["params"][i]:
|
|
1150
|
+
raise SemanticError(
|
|
1151
|
+
"1-1-10-3",
|
|
1152
|
+
op=node.op,
|
|
1153
|
+
expected=dpr_info["params"][i],
|
|
1154
|
+
found=comp_name,
|
|
1155
|
+
)
|
|
1156
|
+
|
|
1157
|
+
output: Any = node.params[0] # invalid, all_measures, all
|
|
1158
|
+
if dpr_info is None:
|
|
1159
|
+
dpr_info = {}
|
|
958
1160
|
|
|
959
1161
|
rule_output_values = {}
|
|
960
1162
|
self.ruleset_dataset = dataset_element
|
|
961
|
-
self.ruleset_signature = dpr_info[
|
|
1163
|
+
self.ruleset_signature = dpr_info["signature"]
|
|
962
1164
|
self.ruleset_mode = output
|
|
963
1165
|
# Gather rule data, adding the ruleset dataset to the interpreter
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
1166
|
+
if dpr_info is not None:
|
|
1167
|
+
for rule in dpr_info["rules"]:
|
|
1168
|
+
rule_output_values[rule.name] = {
|
|
1169
|
+
"errorcode": rule.erCode,
|
|
1170
|
+
"errorlevel": rule.erLevel,
|
|
1171
|
+
"output": self.visit(rule),
|
|
1172
|
+
}
|
|
970
1173
|
self.ruleset_mode = None
|
|
971
1174
|
self.ruleset_signature = None
|
|
972
1175
|
self.ruleset_dataset = None
|
|
973
1176
|
|
|
974
1177
|
# Datapoint Ruleset final evaluation
|
|
975
|
-
return Check_Datapoint.analyze(
|
|
976
|
-
|
|
977
|
-
|
|
1178
|
+
return Check_Datapoint.analyze(
|
|
1179
|
+
dataset_element=dataset_element, rule_info=rule_output_values, output=output
|
|
1180
|
+
)
|
|
978
1181
|
elif node.op in (CHECK_HIERARCHY, HIERARCHY):
|
|
979
1182
|
if len(node.children) == 3:
|
|
980
1183
|
dataset, component, hr_name = (self.visit(x) for x in node.children)
|
|
@@ -993,142 +1196,161 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
993
1196
|
|
|
994
1197
|
if self.hrs is None:
|
|
995
1198
|
raise SemanticError("1-3-19", node_type="Hierarchical Rulesets", node_value="")
|
|
996
|
-
if hr_name not in self.hrs:
|
|
997
|
-
raise SemanticError("1-3-19", node_type="Hierarchical Ruleset", node_value=hr_name)
|
|
998
|
-
|
|
999
|
-
if not isinstance(dataset, Dataset):
|
|
1000
|
-
raise SemanticError("1-1-1-20", op=node.op)
|
|
1001
|
-
|
|
1002
|
-
hr_info = self.hrs[hr_name]
|
|
1003
|
-
|
|
1004
|
-
if len(cond_components) != len(hr_info['condition']):
|
|
1005
|
-
raise SemanticError("1-1-10-2", op=node.op)
|
|
1006
|
-
|
|
1007
|
-
if hr_info['node'].signature_type == 'variable' and hr_info['signature'] != component:
|
|
1008
|
-
raise SemanticError("1-1-10-3", op=node.op,
|
|
1009
|
-
found=component,
|
|
1010
|
-
expected=hr_info['signature'])
|
|
1011
|
-
elif hr_info['node'].signature_type == 'valuedomain' and component is None:
|
|
1012
|
-
raise SemanticError("1-1-10-4", op=node.op)
|
|
1013
|
-
|
|
1014
|
-
cond_info = {}
|
|
1015
|
-
for i, cond_comp in enumerate(hr_info['condition']):
|
|
1016
|
-
if hr_info['node'].signature_type == 'variable' and cond_components[i] != cond_comp:
|
|
1017
|
-
raise SemanticError("1-1-10-6", op=node.op,
|
|
1018
|
-
expected=cond_comp, found=cond_components[i])
|
|
1019
|
-
cond_info[cond_comp] = cond_components[i]
|
|
1020
|
-
|
|
1021
|
-
if node.op == HIERARCHY:
|
|
1022
|
-
aux = []
|
|
1023
|
-
for rule in hr_info['rules']:
|
|
1024
|
-
if rule.rule.op == EQ:
|
|
1025
|
-
aux.append(rule)
|
|
1026
|
-
elif rule.rule.op == WHEN:
|
|
1027
|
-
if rule.rule.right.op == EQ:
|
|
1028
|
-
aux.append(rule)
|
|
1029
|
-
# Filter only the rules with HRBinOP as =,
|
|
1030
|
-
# as they are the ones that will be computed
|
|
1031
|
-
if len(aux) == 0:
|
|
1032
|
-
raise SemanticError("1-1-10-5")
|
|
1033
|
-
hr_info['rules'] = aux
|
|
1034
|
-
|
|
1035
|
-
hierarchy_ast = AST.HRuleset(name=hr_name,
|
|
1036
|
-
signature_type=hr_info['node'].signature_type,
|
|
1037
|
-
element=hr_info['node'].element, rules=aux)
|
|
1038
|
-
HRDAGAnalyzer().visit(hierarchy_ast)
|
|
1039
|
-
|
|
1040
|
-
Check_Hierarchy.validate_hr_dataset(dataset, component)
|
|
1041
|
-
|
|
1042
|
-
# Gather rule data, adding the necessary elements to the interpreter
|
|
1043
|
-
# for simplicity
|
|
1044
|
-
self.ruleset_dataset = dataset
|
|
1045
|
-
self.ruleset_signature = {**{"RULE_COMPONENT": component}, **cond_info}
|
|
1046
|
-
self.ruleset_mode = mode
|
|
1047
|
-
self.hr_input = input_
|
|
1048
|
-
rule_output_values = {}
|
|
1049
|
-
if node.op == HIERARCHY:
|
|
1050
|
-
self.is_from_hr_agg = True
|
|
1051
|
-
self.hr_agg_rules_computed = {}
|
|
1052
|
-
for rule in hr_info['rules']:
|
|
1053
|
-
self.visit(rule)
|
|
1054
|
-
self.is_from_hr_agg = False
|
|
1055
1199
|
else:
|
|
1056
|
-
self.
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1200
|
+
if hr_name not in self.hrs:
|
|
1201
|
+
raise SemanticError(
|
|
1202
|
+
"1-3-19", node_type="Hierarchical Ruleset", node_value=hr_name
|
|
1203
|
+
)
|
|
1204
|
+
|
|
1205
|
+
if not isinstance(dataset, Dataset):
|
|
1206
|
+
raise SemanticError("1-1-1-20", op=node.op)
|
|
1207
|
+
|
|
1208
|
+
hr_info = self.hrs[hr_name]
|
|
1209
|
+
if hr_info is not None:
|
|
1210
|
+
if len(cond_components) != len(hr_info["condition"]):
|
|
1211
|
+
raise SemanticError("1-1-10-2", op=node.op)
|
|
1212
|
+
|
|
1213
|
+
if (
|
|
1214
|
+
hr_info["node"].signature_type == "variable"
|
|
1215
|
+
and hr_info["signature"] != component
|
|
1216
|
+
):
|
|
1217
|
+
raise SemanticError(
|
|
1218
|
+
"1-1-10-3", op=node.op, found=component, expected=hr_info["signature"]
|
|
1219
|
+
)
|
|
1220
|
+
elif hr_info["node"].signature_type == "valuedomain" and component is None:
|
|
1221
|
+
raise SemanticError("1-1-10-4", op=node.op)
|
|
1222
|
+
|
|
1223
|
+
cond_info = {}
|
|
1224
|
+
for i, cond_comp in enumerate(hr_info["condition"]):
|
|
1225
|
+
if (
|
|
1226
|
+
hr_info["node"].signature_type == "variable"
|
|
1227
|
+
and cond_components[i] != cond_comp
|
|
1228
|
+
):
|
|
1229
|
+
raise SemanticError(
|
|
1230
|
+
"1-1-10-6", op=node.op, expected=cond_comp, found=cond_components[i]
|
|
1231
|
+
)
|
|
1232
|
+
cond_info[cond_comp] = cond_components[i]
|
|
1233
|
+
|
|
1234
|
+
if node.op == HIERARCHY:
|
|
1235
|
+
aux = []
|
|
1236
|
+
for rule in hr_info["rules"]:
|
|
1237
|
+
if rule.rule.op == EQ:
|
|
1238
|
+
aux.append(rule)
|
|
1239
|
+
elif rule.rule.op == WHEN:
|
|
1240
|
+
if rule.rule.right.op == EQ:
|
|
1241
|
+
aux.append(rule)
|
|
1242
|
+
# Filter only the rules with HRBinOP as =,
|
|
1243
|
+
# as they are the ones that will be computed
|
|
1244
|
+
if len(aux) == 0:
|
|
1245
|
+
raise SemanticError("1-1-10-5")
|
|
1246
|
+
hr_info["rules"] = aux
|
|
1247
|
+
|
|
1248
|
+
hierarchy_ast = AST.HRuleset(
|
|
1249
|
+
name=hr_name,
|
|
1250
|
+
signature_type=hr_info["node"].signature_type,
|
|
1251
|
+
element=hr_info["node"].element,
|
|
1252
|
+
rules=aux,
|
|
1253
|
+
)
|
|
1254
|
+
HRDAGAnalyzer().visit(hierarchy_ast)
|
|
1255
|
+
|
|
1256
|
+
Check_Hierarchy.validate_hr_dataset(dataset, component)
|
|
1257
|
+
|
|
1258
|
+
# Gather rule data, adding the necessary elements to the interpreter
|
|
1259
|
+
# for simplicity
|
|
1260
|
+
self.ruleset_dataset = dataset
|
|
1261
|
+
self.ruleset_signature = {**{"RULE_COMPONENT": component}, **cond_info}
|
|
1262
|
+
self.ruleset_mode = mode
|
|
1263
|
+
self.hr_input = input_
|
|
1264
|
+
rule_output_values = {}
|
|
1265
|
+
if node.op == HIERARCHY:
|
|
1266
|
+
self.is_from_hr_agg = True
|
|
1267
|
+
self.hr_agg_rules_computed = {}
|
|
1268
|
+
for rule in hr_info["rules"]:
|
|
1269
|
+
self.visit(rule)
|
|
1270
|
+
self.is_from_hr_agg = False
|
|
1271
|
+
else:
|
|
1272
|
+
self.is_from_hr_val = True
|
|
1273
|
+
for rule in hr_info["rules"]:
|
|
1274
|
+
rule_output_values[rule.name] = {
|
|
1275
|
+
"errorcode": rule.erCode,
|
|
1276
|
+
"errorlevel": rule.erLevel,
|
|
1277
|
+
"output": self.visit(rule),
|
|
1278
|
+
}
|
|
1279
|
+
self.is_from_hr_val = False
|
|
1280
|
+
self.ruleset_signature = None
|
|
1281
|
+
self.ruleset_dataset = None
|
|
1282
|
+
self.ruleset_mode = None
|
|
1283
|
+
self.hr_input = None
|
|
1284
|
+
|
|
1285
|
+
# Final evaluation
|
|
1286
|
+
if node.op == CHECK_HIERARCHY:
|
|
1287
|
+
result = Check_Hierarchy.analyze(
|
|
1288
|
+
dataset_element=dataset, rule_info=rule_output_values, output=output
|
|
1289
|
+
)
|
|
1290
|
+
del rule_output_values
|
|
1291
|
+
else:
|
|
1292
|
+
result = Hierarchy.analyze(dataset, self.hr_agg_rules_computed, output)
|
|
1293
|
+
self.hr_agg_rules_computed = None
|
|
1294
|
+
return result
|
|
1079
1295
|
|
|
1080
|
-
raise SemanticError("1-3-5", op_type=
|
|
1296
|
+
raise SemanticError("1-3-5", op_type="ParamOp", node_op=node.op)
|
|
1081
1297
|
|
|
1082
1298
|
def visit_DPRule(self, node: AST.DPRule) -> None:
|
|
1083
1299
|
self.is_from_rule = True
|
|
1084
|
-
if self.ruleset_dataset
|
|
1085
|
-
self.
|
|
1086
|
-
|
|
1087
|
-
|
|
1300
|
+
if self.ruleset_dataset is not None:
|
|
1301
|
+
if self.ruleset_dataset.data is None:
|
|
1302
|
+
self.rule_data = None
|
|
1303
|
+
else:
|
|
1304
|
+
self.rule_data = self.ruleset_dataset.data.copy()
|
|
1088
1305
|
validation_data = self.visit(node.rule)
|
|
1089
1306
|
if isinstance(validation_data, DataComponent):
|
|
1090
|
-
if self.rule_data is not None:
|
|
1307
|
+
if self.rule_data is not None and self.ruleset_dataset is not None:
|
|
1091
1308
|
aux = self.rule_data.loc[:, self.ruleset_dataset.get_components_names()]
|
|
1092
|
-
aux[
|
|
1309
|
+
aux["bool_var"] = validation_data.data
|
|
1093
1310
|
validation_data = aux
|
|
1094
1311
|
else:
|
|
1095
1312
|
validation_data = None
|
|
1096
1313
|
if self.ruleset_mode == "invalid" and validation_data is not None:
|
|
1097
|
-
validation_data = validation_data[validation_data[
|
|
1314
|
+
validation_data = validation_data[validation_data["bool_var"] == False]
|
|
1098
1315
|
self.rule_data = None
|
|
1099
1316
|
self.is_from_rule = False
|
|
1100
1317
|
return validation_data
|
|
1101
1318
|
|
|
1102
1319
|
def visit_HRule(self, node: AST.HRule) -> None:
|
|
1103
1320
|
self.is_from_rule = True
|
|
1104
|
-
if self.ruleset_dataset
|
|
1105
|
-
self.rule_data =
|
|
1106
|
-
|
|
1107
|
-
|
|
1321
|
+
if self.ruleset_dataset is not None:
|
|
1322
|
+
self.rule_data = (
|
|
1323
|
+
None if self.ruleset_dataset.data is None else self.ruleset_dataset.data.copy()
|
|
1324
|
+
)
|
|
1108
1325
|
rule_result = self.visit(node.rule)
|
|
1109
1326
|
if rule_result is None:
|
|
1110
1327
|
self.is_from_rule = False
|
|
1111
1328
|
return None
|
|
1112
1329
|
if self.is_from_hr_agg:
|
|
1113
1330
|
measure_name = rule_result.get_measures_names()[0]
|
|
1114
|
-
if
|
|
1115
|
-
|
|
1116
|
-
|
|
1331
|
+
if (
|
|
1332
|
+
self.hr_agg_rules_computed is not None
|
|
1333
|
+
and rule_result.data is not None
|
|
1334
|
+
and len(rule_result.data[measure_name]) > 0
|
|
1335
|
+
):
|
|
1336
|
+
self.hr_agg_rules_computed[rule_result.name] = rule_result.data
|
|
1117
1337
|
else:
|
|
1118
1338
|
rule_result = rule_result.data
|
|
1119
1339
|
self.rule_data = None
|
|
1120
1340
|
self.is_from_rule = False
|
|
1121
1341
|
return rule_result
|
|
1122
1342
|
|
|
1123
|
-
def visit_HRBinOp(self, node: AST.HRBinOp) ->
|
|
1343
|
+
def visit_HRBinOp(self, node: AST.HRBinOp) -> Any:
|
|
1124
1344
|
if node.op == WHEN:
|
|
1125
1345
|
filter_comp = self.visit(node.left)
|
|
1126
1346
|
if self.rule_data is None:
|
|
1127
1347
|
return None
|
|
1128
1348
|
filtering_indexes = list(filter_comp.data[filter_comp.data == True].index)
|
|
1349
|
+
nan_indexes = list(filter_comp.data[filter_comp.data.isnull()].index)
|
|
1129
1350
|
# If no filtering indexes, then all datapoints are valid on DPR and HR
|
|
1130
1351
|
if len(filtering_indexes) == 0 and not (self.is_from_hr_agg or self.is_from_hr_val):
|
|
1131
|
-
self.rule_data[
|
|
1352
|
+
self.rule_data["bool_var"] = True
|
|
1353
|
+
self.rule_data.loc[nan_indexes, "bool_var"] = None
|
|
1132
1354
|
return self.rule_data
|
|
1133
1355
|
non_filtering_indexes = list(set(filter_comp.data.index) - set(filtering_indexes))
|
|
1134
1356
|
|
|
@@ -1138,14 +1360,16 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1138
1360
|
if self.is_from_hr_agg or self.is_from_hr_val:
|
|
1139
1361
|
# We only need to filter rule_data on DPR
|
|
1140
1362
|
return result_validation
|
|
1141
|
-
self.rule_data[
|
|
1142
|
-
original_data = original_data.merge(
|
|
1143
|
-
|
|
1144
|
-
|
|
1363
|
+
self.rule_data["bool_var"] = result_validation.data
|
|
1364
|
+
original_data = original_data.merge(
|
|
1365
|
+
self.rule_data, how="left", on=original_data.columns.tolist()
|
|
1366
|
+
)
|
|
1367
|
+
original_data.loc[non_filtering_indexes, "bool_var"] = True
|
|
1368
|
+
original_data.loc[nan_indexes, "bool_var"] = None
|
|
1145
1369
|
return original_data
|
|
1146
1370
|
elif node.op in HR_COMP_MAPPING:
|
|
1147
1371
|
self.is_from_assignment = True
|
|
1148
|
-
if self.ruleset_mode in (
|
|
1372
|
+
if self.ruleset_mode in ("partial_null", "partial_zero"):
|
|
1149
1373
|
self.hr_partial_is_valid = []
|
|
1150
1374
|
left_operand = self.visit(node.left)
|
|
1151
1375
|
self.is_from_assignment = False
|
|
@@ -1153,7 +1377,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1153
1377
|
if isinstance(right_operand, Dataset):
|
|
1154
1378
|
right_operand = get_measure_from_dataset(right_operand, node.right.value)
|
|
1155
1379
|
|
|
1156
|
-
if self.ruleset_mode in (
|
|
1380
|
+
if self.ruleset_mode in ("partial_null", "partial_zero"):
|
|
1157
1381
|
# Check all values were present in the dataset
|
|
1158
1382
|
if self.hr_partial_is_valid and not any(self.hr_partial_is_valid):
|
|
1159
1383
|
right_operand.data = right_operand.data.map(lambda x: "REMOVE_VALUE")
|
|
@@ -1162,8 +1386,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1162
1386
|
if self.is_from_hr_agg:
|
|
1163
1387
|
return HAAssignment.analyze(left_operand, right_operand, self.ruleset_mode)
|
|
1164
1388
|
else:
|
|
1165
|
-
result = HR_COMP_MAPPING[node.op].analyze(
|
|
1166
|
-
|
|
1389
|
+
result = HR_COMP_MAPPING[node.op].analyze(
|
|
1390
|
+
left_operand, right_operand, self.ruleset_mode
|
|
1391
|
+
)
|
|
1167
1392
|
left_measure = left_operand.get_measures()[0]
|
|
1168
1393
|
if left_operand.data is None:
|
|
1169
1394
|
result.data = None
|
|
@@ -1175,14 +1400,23 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1175
1400
|
else:
|
|
1176
1401
|
left_operand = self.visit(node.left)
|
|
1177
1402
|
right_operand = self.visit(node.right)
|
|
1178
|
-
if
|
|
1179
|
-
|
|
1180
|
-
|
|
1403
|
+
if (
|
|
1404
|
+
isinstance(left_operand, Dataset)
|
|
1405
|
+
and isinstance(right_operand, Dataset)
|
|
1406
|
+
and self.ruleset_mode in ("partial_null", "partial_zero")
|
|
1407
|
+
and not self.only_semantic
|
|
1408
|
+
):
|
|
1181
1409
|
measure_name = left_operand.get_measures_names()[0]
|
|
1410
|
+
if left_operand.data is None:
|
|
1411
|
+
left_operand.data = pd.DataFrame({measure_name: []})
|
|
1412
|
+
if right_operand.data is None:
|
|
1413
|
+
right_operand.data = pd.DataFrame({measure_name: []})
|
|
1182
1414
|
left_null_indexes = set(
|
|
1183
|
-
list(left_operand.data[left_operand.data[measure_name].isnull()].index)
|
|
1415
|
+
list(left_operand.data[left_operand.data[measure_name].isnull()].index)
|
|
1416
|
+
)
|
|
1184
1417
|
right_null_indexes = set(
|
|
1185
|
-
list(right_operand.data[right_operand.data[measure_name].isnull()].index)
|
|
1418
|
+
list(right_operand.data[right_operand.data[measure_name].isnull()].index)
|
|
1419
|
+
)
|
|
1186
1420
|
# If no indexes are in common, then one datapoint is not null
|
|
1187
1421
|
invalid_indexes = list(left_null_indexes.intersection(right_null_indexes))
|
|
1188
1422
|
if len(invalid_indexes) > 0:
|
|
@@ -1209,11 +1443,13 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1209
1443
|
if not isinstance(imbalance_element, Dataset):
|
|
1210
1444
|
raise ValueError(f"Expected dataset, got {type(validation_element).__name__}")
|
|
1211
1445
|
|
|
1212
|
-
return Check.analyze(
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1446
|
+
return Check.analyze(
|
|
1447
|
+
validation_element=validation_element,
|
|
1448
|
+
imbalance_element=imbalance_element,
|
|
1449
|
+
error_code=node.error_code,
|
|
1450
|
+
error_level=node.error_level,
|
|
1451
|
+
invalid=node.invalid,
|
|
1452
|
+
)
|
|
1217
1453
|
|
|
1218
1454
|
def visit_EvalOp(self, node: AST.EvalOp) -> Dataset:
|
|
1219
1455
|
"""
|
|
@@ -1234,22 +1470,28 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1234
1470
|
raise SemanticError("2-3-10", comp_type="External Routines")
|
|
1235
1471
|
|
|
1236
1472
|
if node.name not in self.external_routines:
|
|
1237
|
-
raise SemanticError("1-3-5", op_type=
|
|
1473
|
+
raise SemanticError("1-3-5", op_type="External Routine", node_op=node.name)
|
|
1238
1474
|
external_routine = self.external_routines[node.name]
|
|
1239
1475
|
operands = {}
|
|
1240
1476
|
for operand in node.operands:
|
|
1241
|
-
element =
|
|
1477
|
+
element = self.visit(operand)
|
|
1242
1478
|
if not isinstance(element, Dataset):
|
|
1243
1479
|
raise ValueError(f"Expected dataset, got {type(element).__name__} as Eval Operand")
|
|
1244
1480
|
operands[element.name.split(".")[1] if "." in element.name else element.name] = element
|
|
1245
1481
|
output_to_check = node.output
|
|
1246
1482
|
return Eval.analyze(operands, external_routine, output_to_check)
|
|
1247
1483
|
|
|
1248
|
-
def generate_then_else_datasets(self, condition):
|
|
1484
|
+
def generate_then_else_datasets(self, condition: Union[Dataset, DataComponent]) -> None:
|
|
1249
1485
|
components = {}
|
|
1486
|
+
if self.then_condition_dataset is None:
|
|
1487
|
+
self.then_condition_dataset = []
|
|
1488
|
+
if self.else_condition_dataset is None:
|
|
1489
|
+
self.else_condition_dataset = []
|
|
1250
1490
|
if isinstance(condition, Dataset):
|
|
1251
|
-
if
|
|
1252
|
-
|
|
1491
|
+
if (
|
|
1492
|
+
len(condition.get_measures()) != 1
|
|
1493
|
+
or condition.get_measures()[0].data_type != BASIC_TYPES[bool]
|
|
1494
|
+
):
|
|
1253
1495
|
raise ValueError("Only one boolean measure is allowed on condition dataset")
|
|
1254
1496
|
name = condition.get_measures_names()[0]
|
|
1255
1497
|
if condition.data is None or condition.data.empty:
|
|
@@ -1268,9 +1510,12 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1268
1510
|
data = condition.data
|
|
1269
1511
|
|
|
1270
1512
|
if data is not None:
|
|
1271
|
-
if self.
|
|
1272
|
-
merge_df =
|
|
1273
|
-
|
|
1513
|
+
if self.nested_condition and self.condition_stack is not None:
|
|
1514
|
+
merge_df = (
|
|
1515
|
+
self.then_condition_dataset[-1]
|
|
1516
|
+
if self.condition_stack[-1] == THEN_ELSE["then"]
|
|
1517
|
+
else self.else_condition_dataset[-1]
|
|
1518
|
+
)
|
|
1274
1519
|
indexes = merge_df.data[merge_df.data.columns[-1]]
|
|
1275
1520
|
else:
|
|
1276
1521
|
indexes = data.index
|
|
@@ -1278,13 +1523,21 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1278
1523
|
|
|
1279
1524
|
if isinstance(condition, Dataset):
|
|
1280
1525
|
filtered_data = data.iloc[indexes]
|
|
1281
|
-
then_data =
|
|
1282
|
-
|
|
1526
|
+
then_data: Any = (
|
|
1527
|
+
condition.data[condition.data[name] == True]
|
|
1528
|
+
if (condition.data is not None)
|
|
1529
|
+
else []
|
|
1530
|
+
)
|
|
1531
|
+
then_indexes: Any = list(filtered_data[filtered_data == True].index)
|
|
1283
1532
|
if len(then_data) > len(then_indexes):
|
|
1284
1533
|
then_data = then_data.iloc[then_indexes]
|
|
1285
1534
|
then_data[name] = then_indexes
|
|
1286
|
-
else_data =
|
|
1287
|
-
|
|
1535
|
+
else_data: Any = (
|
|
1536
|
+
condition.data[condition.data[name] != True]
|
|
1537
|
+
if (condition.data is not None)
|
|
1538
|
+
else []
|
|
1539
|
+
)
|
|
1540
|
+
else_indexes: Any = list(set(indexes) - set(then_indexes))
|
|
1288
1541
|
if len(else_data) > len(else_indexes):
|
|
1289
1542
|
else_data = else_data.iloc[else_indexes]
|
|
1290
1543
|
else_data[name] = else_indexes
|
|
@@ -1298,28 +1551,44 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1298
1551
|
then_data = pd.DataFrame({name: []})
|
|
1299
1552
|
else_data = pd.DataFrame({name: []})
|
|
1300
1553
|
components.update(
|
|
1301
|
-
{
|
|
1302
|
-
|
|
1554
|
+
{
|
|
1555
|
+
name: Component(
|
|
1556
|
+
name=name, data_type=BASIC_TYPES[int], role=Role.MEASURE, nullable=True
|
|
1557
|
+
)
|
|
1558
|
+
}
|
|
1559
|
+
)
|
|
1303
1560
|
then_dataset = Dataset(name=name, components=components, data=then_data)
|
|
1304
1561
|
else_dataset = Dataset(name=name, components=components, data=else_data)
|
|
1305
1562
|
self.then_condition_dataset.append(then_dataset)
|
|
1306
1563
|
self.else_condition_dataset.append(else_dataset)
|
|
1307
1564
|
|
|
1308
|
-
def merge_then_else_datasets(self, left_operand:
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
self.else_condition_dataset
|
|
1565
|
+
def merge_then_else_datasets(self, left_operand: Any, right_operand: Any) -> Any:
|
|
1566
|
+
if (
|
|
1567
|
+
self.then_condition_dataset is None
|
|
1568
|
+
or self.else_condition_dataset is None
|
|
1569
|
+
or self.condition_stack is None
|
|
1570
|
+
):
|
|
1571
|
+
return left_operand, right_operand
|
|
1572
|
+
merge_dataset = (
|
|
1573
|
+
self.then_condition_dataset.pop()
|
|
1574
|
+
if self.condition_stack.pop() == THEN_ELSE["then"]
|
|
1575
|
+
else (self.else_condition_dataset.pop())
|
|
1576
|
+
)
|
|
1312
1577
|
merge_index = merge_dataset.data[merge_dataset.get_measures_names()[0]].to_list()
|
|
1313
1578
|
ids = merge_dataset.get_identifiers_names()
|
|
1314
1579
|
if isinstance(left_operand, Dataset | DataComponent):
|
|
1315
1580
|
if left_operand.data is None:
|
|
1316
1581
|
return left_operand, right_operand
|
|
1317
1582
|
if isinstance(left_operand, Dataset):
|
|
1318
|
-
dataset_index = left_operand.data.index[
|
|
1319
|
-
|
|
1583
|
+
dataset_index = left_operand.data.index[
|
|
1584
|
+
left_operand.data[ids]
|
|
1585
|
+
.apply(tuple, 1)
|
|
1586
|
+
.isin(merge_dataset.data[ids].apply(tuple, 1))
|
|
1587
|
+
]
|
|
1320
1588
|
left = left_operand.data[left_operand.get_measures_names()[0]]
|
|
1321
1589
|
left_operand.data[left_operand.get_measures_names()[0]] = left.reindex(
|
|
1322
|
-
dataset_index, fill_value=None
|
|
1590
|
+
dataset_index, fill_value=None
|
|
1591
|
+
)
|
|
1323
1592
|
else:
|
|
1324
1593
|
left = left_operand.data
|
|
1325
1594
|
left_operand.data = left.reindex(merge_index, fill_value=None)
|
|
@@ -1328,17 +1597,20 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1328
1597
|
return left_operand, right_operand
|
|
1329
1598
|
if isinstance(right_operand, Dataset):
|
|
1330
1599
|
dataset_index = right_operand.data.index[
|
|
1331
|
-
right_operand.data[ids]
|
|
1332
|
-
|
|
1600
|
+
right_operand.data[ids]
|
|
1601
|
+
.apply(tuple, 1)
|
|
1602
|
+
.isin(merge_dataset.data[ids].apply(tuple, 1))
|
|
1603
|
+
]
|
|
1333
1604
|
right = right_operand.data[right_operand.get_measures_names()[0]]
|
|
1334
1605
|
right_operand.data[right_operand.get_measures_names()[0]] = right.reindex(
|
|
1335
|
-
dataset_index, fill_value=None
|
|
1606
|
+
dataset_index, fill_value=None
|
|
1607
|
+
)
|
|
1336
1608
|
else:
|
|
1337
1609
|
right = right_operand.data
|
|
1338
1610
|
right_operand.data = right.reindex(merge_index, fill_value=None)
|
|
1339
1611
|
return left_operand, right_operand
|
|
1340
1612
|
|
|
1341
|
-
def visit_Identifier(self, node: AST.Identifier) -> AST.AST:
|
|
1613
|
+
def visit_Identifier(self, node: AST.Identifier) -> Union[AST.AST, Dataset, str]:
|
|
1342
1614
|
"""
|
|
1343
1615
|
Identifier: (value)
|
|
1344
1616
|
|
|
@@ -1356,7 +1628,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1356
1628
|
return self.datasets[node.value]
|
|
1357
1629
|
return node.value
|
|
1358
1630
|
|
|
1359
|
-
def visit_DefIdentifier(self, node: AST.DefIdentifier) ->
|
|
1631
|
+
def visit_DefIdentifier(self, node: AST.DefIdentifier) -> Any:
|
|
1360
1632
|
"""
|
|
1361
1633
|
DefIdentifier: (value, kind)
|
|
1362
1634
|
|
|
@@ -1366,51 +1638,63 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1366
1638
|
"""
|
|
1367
1639
|
partial_is_valid = True
|
|
1368
1640
|
# Only for Hierarchical Rulesets
|
|
1369
|
-
if not (self.is_from_rule and node.kind ==
|
|
1641
|
+
if not (self.is_from_rule and node.kind == "CodeItemID"):
|
|
1370
1642
|
return node.value
|
|
1371
1643
|
|
|
1372
1644
|
# Getting Dataset elements
|
|
1373
|
-
result_components = {
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1645
|
+
result_components = {
|
|
1646
|
+
comp_name: copy(comp)
|
|
1647
|
+
for comp_name, comp in
|
|
1648
|
+
self.ruleset_dataset.components.items() # type: ignore[union-attr]
|
|
1649
|
+
}
|
|
1650
|
+
if self.ruleset_signature is not None:
|
|
1651
|
+
hr_component = self.ruleset_signature["RULE_COMPONENT"]
|
|
1377
1652
|
name = node.value
|
|
1378
1653
|
|
|
1379
1654
|
if self.rule_data is None:
|
|
1380
1655
|
return Dataset(name=name, components=result_components, data=None)
|
|
1381
1656
|
|
|
1382
1657
|
condition = None
|
|
1383
|
-
if hasattr(node,
|
|
1384
|
-
condition: DataComponent = self.visit(node._right_condition)
|
|
1385
|
-
condition
|
|
1386
|
-
|
|
1387
|
-
|
|
1658
|
+
if hasattr(node, "_right_condition"):
|
|
1659
|
+
condition: DataComponent = self.visit(node._right_condition) # type: ignore[no-redef]
|
|
1660
|
+
if condition is not None:
|
|
1661
|
+
condition = condition.data[condition.data == True].index
|
|
1662
|
+
|
|
1663
|
+
if (
|
|
1664
|
+
self.hr_agg_rules_computed is not None
|
|
1665
|
+
and self.hr_input == "rule"
|
|
1666
|
+
and node.value in self.hr_agg_rules_computed
|
|
1667
|
+
):
|
|
1388
1668
|
df = self.hr_agg_rules_computed[node.value].copy()
|
|
1389
1669
|
return Dataset(name=name, components=result_components, data=df)
|
|
1390
1670
|
|
|
1391
1671
|
df = self.rule_data.copy()
|
|
1392
1672
|
if condition is not None:
|
|
1393
1673
|
df = df.loc[condition].reset_index(drop=True)
|
|
1394
|
-
|
|
1674
|
+
|
|
1675
|
+
measure_name = self.ruleset_dataset.get_measures_names()[0] # type: ignore[union-attr]
|
|
1395
1676
|
if node.value in df[hr_component].values:
|
|
1396
|
-
rest_identifiers = [
|
|
1397
|
-
|
|
1677
|
+
rest_identifiers = [
|
|
1678
|
+
comp.name
|
|
1679
|
+
for comp in result_components.values()
|
|
1680
|
+
if comp.role == Role.IDENTIFIER and comp.name != hr_component
|
|
1681
|
+
]
|
|
1398
1682
|
code_data = df[df[hr_component] == node.value].reset_index(drop=True)
|
|
1399
|
-
code_data = code_data.merge(df[rest_identifiers], how=
|
|
1683
|
+
code_data = code_data.merge(df[rest_identifiers], how="right", on=rest_identifiers)
|
|
1400
1684
|
code_data = code_data.drop_duplicates().reset_index(drop=True)
|
|
1401
1685
|
|
|
1402
1686
|
# If the value is in the dataset, we create a new row
|
|
1403
1687
|
# based on the hierarchy mode
|
|
1404
1688
|
# (Missing data points are considered,
|
|
1405
1689
|
# lines 6483-6510 of the reference manual)
|
|
1406
|
-
if self.ruleset_mode in (
|
|
1690
|
+
if self.ruleset_mode in ("partial_null", "partial_zero"):
|
|
1407
1691
|
# We do not care about the presence of the leftCodeItem in Hierarchy Roll-up
|
|
1408
1692
|
if self.is_from_hr_agg and self.is_from_assignment:
|
|
1409
1693
|
pass
|
|
1410
1694
|
elif code_data[hr_component].isnull().any():
|
|
1411
1695
|
partial_is_valid = False
|
|
1412
1696
|
|
|
1413
|
-
if self.ruleset_mode in (
|
|
1697
|
+
if self.ruleset_mode in ("non_zero", "partial_zero", "always_zero"):
|
|
1414
1698
|
fill_indexes = code_data[code_data[hr_component].isnull()].index
|
|
1415
1699
|
code_data.loc[fill_indexes, measure_name] = 0
|
|
1416
1700
|
code_data[hr_component] = node.value
|
|
@@ -1420,86 +1704,116 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1420
1704
|
# based on the hierarchy mode
|
|
1421
1705
|
# (Missing data points are considered,
|
|
1422
1706
|
# lines 6483-6510 of the reference manual)
|
|
1423
|
-
if self.ruleset_mode in (
|
|
1707
|
+
if self.ruleset_mode in ("partial_null", "partial_zero"):
|
|
1424
1708
|
# We do not care about the presence of the leftCodeItem in Hierarchy Roll-up
|
|
1425
1709
|
if self.is_from_hr_agg and self.is_from_assignment:
|
|
1426
1710
|
pass
|
|
1427
|
-
elif self.ruleset_mode ==
|
|
1711
|
+
elif self.ruleset_mode == "partial_null":
|
|
1428
1712
|
partial_is_valid = False
|
|
1429
1713
|
df = df.head(1)
|
|
1430
1714
|
df[hr_component] = node.value
|
|
1431
|
-
if self.ruleset_mode in (
|
|
1715
|
+
if self.ruleset_mode in ("non_zero", "partial_zero", "always_zero"):
|
|
1432
1716
|
df[measure_name] = 0
|
|
1433
1717
|
else: # For non_null, partial_null and always_null
|
|
1434
1718
|
df[measure_name] = None
|
|
1435
|
-
if self.ruleset_mode in (
|
|
1719
|
+
if self.hr_partial_is_valid is not None and self.ruleset_mode in (
|
|
1720
|
+
"partial_null",
|
|
1721
|
+
"partial_zero",
|
|
1722
|
+
):
|
|
1436
1723
|
self.hr_partial_is_valid.append(partial_is_valid)
|
|
1437
1724
|
return Dataset(name=name, components=result_components, data=df)
|
|
1438
1725
|
|
|
1439
|
-
def visit_UDOCall(self, node: AST.UDOCall) -> None:
|
|
1726
|
+
def visit_UDOCall(self, node: AST.UDOCall) -> None: # noqa: C901
|
|
1440
1727
|
if self.udos is None:
|
|
1441
1728
|
raise SemanticError("2-3-10", comp_type="User Defined Operators")
|
|
1442
1729
|
elif node.op not in self.udos:
|
|
1443
|
-
raise SemanticError("1-3-5", node_op=node.op, op_type=
|
|
1444
|
-
|
|
1445
|
-
signature_values = {}
|
|
1730
|
+
raise SemanticError("1-3-5", node_op=node.op, op_type="User Defined Operator")
|
|
1446
1731
|
|
|
1447
1732
|
operator = self.udos[node.op]
|
|
1733
|
+
signature_values = {}
|
|
1448
1734
|
|
|
1449
|
-
if operator
|
|
1450
|
-
|
|
1735
|
+
if operator is None:
|
|
1736
|
+
raise SemanticError("1-3-5", node_op=node.op, op_type="User Defined Operator")
|
|
1737
|
+
if operator["output"] == "Component" and not (
|
|
1738
|
+
self.is_from_regular_aggregation or self.is_from_rule
|
|
1739
|
+
):
|
|
1451
1740
|
raise SemanticError("1-3-29", op=node.op)
|
|
1452
1741
|
|
|
1453
|
-
for i, param in enumerate(operator[
|
|
1742
|
+
for i, param in enumerate(operator["params"]):
|
|
1454
1743
|
if i >= len(node.params):
|
|
1455
|
-
if
|
|
1456
|
-
value = self.visit(param[
|
|
1457
|
-
signature_values[param[
|
|
1458
|
-
|
|
1744
|
+
if "default" in param:
|
|
1745
|
+
value = self.visit(param["default"]).value
|
|
1746
|
+
signature_values[param["name"]] = Scalar(
|
|
1747
|
+
name=str(value), value=value, data_type=BASIC_TYPES[type(value)]
|
|
1748
|
+
)
|
|
1459
1749
|
else:
|
|
1460
|
-
raise SemanticError(
|
|
1461
|
-
|
|
1750
|
+
raise SemanticError(
|
|
1751
|
+
"1-3-28",
|
|
1752
|
+
op=node.op,
|
|
1753
|
+
received=len(node.params),
|
|
1754
|
+
expected=len(operator["params"]),
|
|
1755
|
+
)
|
|
1462
1756
|
else:
|
|
1463
|
-
if isinstance(param[
|
|
1464
|
-
if param[
|
|
1465
|
-
signature_values[param[
|
|
1466
|
-
elif param[
|
|
1757
|
+
if isinstance(param["type"], str): # Scalar, Dataset, Component
|
|
1758
|
+
if param["type"] == "Scalar":
|
|
1759
|
+
signature_values[param["name"]] = self.visit(node.params[i])
|
|
1760
|
+
elif param["type"] in ["Dataset", "Component"]:
|
|
1467
1761
|
if isinstance(node.params[i], AST.VarID):
|
|
1468
|
-
signature_values[param[
|
|
1762
|
+
signature_values[param["name"]] = (
|
|
1763
|
+
node.params[i].value) # type: ignore[attr-defined]
|
|
1469
1764
|
else:
|
|
1470
1765
|
param_element = self.visit(node.params[i])
|
|
1471
1766
|
if isinstance(param_element, Dataset):
|
|
1472
|
-
if param[
|
|
1473
|
-
raise SemanticError(
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
|
|
1767
|
+
if param["type"] == "Component":
|
|
1768
|
+
raise SemanticError(
|
|
1769
|
+
"1-4-1-1",
|
|
1770
|
+
op=node.op,
|
|
1771
|
+
option=param["name"],
|
|
1772
|
+
type_1=param["type"],
|
|
1773
|
+
type_2="Dataset",
|
|
1774
|
+
)
|
|
1775
|
+
elif isinstance(param_element, Scalar) and param["type"] in [
|
|
1776
|
+
"Dataset",
|
|
1777
|
+
"Component",
|
|
1778
|
+
]:
|
|
1779
|
+
raise SemanticError(
|
|
1780
|
+
"1-4-1-1",
|
|
1781
|
+
op=node.op,
|
|
1782
|
+
option=param["name"],
|
|
1783
|
+
type_1=param["type"],
|
|
1784
|
+
type_2="Scalar",
|
|
1785
|
+
)
|
|
1786
|
+
signature_values[param["name"]] = param_element
|
|
1482
1787
|
|
|
1483
1788
|
else:
|
|
1484
1789
|
raise NotImplementedError
|
|
1485
|
-
elif issubclass(param[
|
|
1790
|
+
elif issubclass(param["type"], ScalarType): # Basic types
|
|
1486
1791
|
# For basic Scalar types (Integer, Float, String, Boolean)
|
|
1487
1792
|
# We validate the type is correct and cast the value
|
|
1488
1793
|
param_element = self.visit(node.params[i])
|
|
1489
1794
|
if isinstance(param_element, (Dataset, DataComponent)):
|
|
1490
|
-
type_2 =
|
|
1491
|
-
raise SemanticError(
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
|
|
1795
|
+
type_2 = "Dataset" if isinstance(param_element, Dataset) else "Component"
|
|
1796
|
+
raise SemanticError(
|
|
1797
|
+
"1-4-1-1",
|
|
1798
|
+
op=node.op,
|
|
1799
|
+
option=param["name"],
|
|
1800
|
+
type_1=param["type"],
|
|
1801
|
+
type_2=type_2,
|
|
1802
|
+
)
|
|
1803
|
+
scalar_type = param["type"]
|
|
1495
1804
|
if not check_unary_implicit_promotion(param_element.data_type, scalar_type):
|
|
1496
|
-
raise SemanticError(
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1805
|
+
raise SemanticError(
|
|
1806
|
+
"2-3-5",
|
|
1807
|
+
param_type=scalar_type,
|
|
1808
|
+
type_name=param_element.data_type,
|
|
1809
|
+
op=node.op,
|
|
1810
|
+
param_name=param["name"],
|
|
1811
|
+
)
|
|
1812
|
+
signature_values[param["name"]] = Scalar(
|
|
1813
|
+
name=param_element.name,
|
|
1814
|
+
value=scalar_type.cast(param_element.value),
|
|
1815
|
+
data_type=scalar_type,
|
|
1816
|
+
)
|
|
1503
1817
|
else:
|
|
1504
1818
|
raise NotImplementedError
|
|
1505
1819
|
|
|
@@ -1511,17 +1825,22 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1511
1825
|
self.udo_params.append(signature_values)
|
|
1512
1826
|
|
|
1513
1827
|
# Calling the UDO AST, we use deepcopy to avoid changing the original UDO AST
|
|
1514
|
-
|
|
1828
|
+
if operator is not None:
|
|
1829
|
+
result = self.visit(deepcopy(operator["expression"]))
|
|
1515
1830
|
|
|
1516
1831
|
if self.is_from_regular_aggregation or self.is_from_rule:
|
|
1517
|
-
result_type =
|
|
1832
|
+
result_type = "Component" if isinstance(result, DataComponent) else "Scalar"
|
|
1518
1833
|
else:
|
|
1519
|
-
result_type =
|
|
1834
|
+
result_type = "Scalar" if isinstance(result, Scalar) else "Dataset"
|
|
1520
1835
|
|
|
1521
|
-
if result_type != operator[
|
|
1522
|
-
raise SemanticError(
|
|
1523
|
-
|
|
1524
|
-
|
|
1836
|
+
if result_type != operator["output"]:
|
|
1837
|
+
raise SemanticError(
|
|
1838
|
+
"1-4-1-1",
|
|
1839
|
+
op=node.op,
|
|
1840
|
+
option="output",
|
|
1841
|
+
type_1=operator["output"],
|
|
1842
|
+
type_2=result_type,
|
|
1843
|
+
)
|
|
1525
1844
|
|
|
1526
1845
|
# We pop the last element of the stack (current UDO params)
|
|
1527
1846
|
# to avoid using them in the next UDO call
|
|
@@ -1535,5 +1854,6 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1535
1854
|
def visit_TimeAggregation(self, node: AST.TimeAggregation) -> None:
|
|
1536
1855
|
operand = self.visit(node.operand)
|
|
1537
1856
|
|
|
1538
|
-
return Time_Aggregation.analyze(
|
|
1539
|
-
|
|
1857
|
+
return Time_Aggregation.analyze(
|
|
1858
|
+
operand=operand, period_from=node.period_from, period_to=node.period_to, conf=node.conf
|
|
1859
|
+
)
|