vtlengine 1.4.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vtlengine/API/_InternalApi.py +791 -0
- vtlengine/API/__init__.py +612 -0
- vtlengine/API/data/schema/external_routines_schema.json +34 -0
- vtlengine/API/data/schema/json_schema_2.1.json +116 -0
- vtlengine/API/data/schema/value_domain_schema.json +97 -0
- vtlengine/AST/ASTComment.py +57 -0
- vtlengine/AST/ASTConstructor.py +598 -0
- vtlengine/AST/ASTConstructorModules/Expr.py +1928 -0
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +995 -0
- vtlengine/AST/ASTConstructorModules/Terminals.py +790 -0
- vtlengine/AST/ASTConstructorModules/__init__.py +50 -0
- vtlengine/AST/ASTDataExchange.py +10 -0
- vtlengine/AST/ASTEncoders.py +32 -0
- vtlengine/AST/ASTString.py +675 -0
- vtlengine/AST/ASTTemplate.py +558 -0
- vtlengine/AST/ASTVisitor.py +25 -0
- vtlengine/AST/DAG/__init__.py +479 -0
- vtlengine/AST/DAG/_words.py +10 -0
- vtlengine/AST/Grammar/Vtl.g4 +705 -0
- vtlengine/AST/Grammar/VtlTokens.g4 +409 -0
- vtlengine/AST/Grammar/__init__.py +0 -0
- vtlengine/AST/Grammar/lexer.py +2139 -0
- vtlengine/AST/Grammar/parser.py +16597 -0
- vtlengine/AST/Grammar/tokens.py +169 -0
- vtlengine/AST/VtlVisitor.py +824 -0
- vtlengine/AST/__init__.py +674 -0
- vtlengine/DataTypes/TimeHandling.py +562 -0
- vtlengine/DataTypes/__init__.py +863 -0
- vtlengine/DataTypes/_time_checking.py +135 -0
- vtlengine/Exceptions/__exception_file_generator.py +96 -0
- vtlengine/Exceptions/__init__.py +159 -0
- vtlengine/Exceptions/messages.py +1004 -0
- vtlengine/Interpreter/__init__.py +2048 -0
- vtlengine/Model/__init__.py +501 -0
- vtlengine/Operators/Aggregation.py +357 -0
- vtlengine/Operators/Analytic.py +455 -0
- vtlengine/Operators/Assignment.py +23 -0
- vtlengine/Operators/Boolean.py +106 -0
- vtlengine/Operators/CastOperator.py +451 -0
- vtlengine/Operators/Clause.py +366 -0
- vtlengine/Operators/Comparison.py +488 -0
- vtlengine/Operators/Conditional.py +495 -0
- vtlengine/Operators/General.py +191 -0
- vtlengine/Operators/HROperators.py +254 -0
- vtlengine/Operators/Join.py +447 -0
- vtlengine/Operators/Numeric.py +422 -0
- vtlengine/Operators/RoleSetter.py +77 -0
- vtlengine/Operators/Set.py +176 -0
- vtlengine/Operators/String.py +578 -0
- vtlengine/Operators/Time.py +1144 -0
- vtlengine/Operators/Validation.py +275 -0
- vtlengine/Operators/__init__.py +900 -0
- vtlengine/Utils/__Virtual_Assets.py +34 -0
- vtlengine/Utils/__init__.py +479 -0
- vtlengine/__extras_check.py +17 -0
- vtlengine/__init__.py +27 -0
- vtlengine/files/__init__.py +0 -0
- vtlengine/files/output/__init__.py +35 -0
- vtlengine/files/output/_time_period_representation.py +55 -0
- vtlengine/files/parser/__init__.py +240 -0
- vtlengine/files/parser/_rfc_dialect.py +22 -0
- vtlengine/py.typed +0 -0
- vtlengine-1.4.0rc2.dist-info/METADATA +89 -0
- vtlengine-1.4.0rc2.dist-info/RECORD +66 -0
- vtlengine-1.4.0rc2.dist-info/WHEEL +4 -0
- vtlengine-1.4.0rc2.dist-info/licenses/LICENSE.md +661 -0
|
@@ -0,0 +1,2048 @@
|
|
|
1
|
+
import csv
|
|
2
|
+
from copy import copy, deepcopy
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any, Dict, List, Optional, Type, Union
|
|
6
|
+
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
import vtlengine.AST as AST
|
|
10
|
+
import vtlengine.Exceptions
|
|
11
|
+
import vtlengine.Operators as Operators
|
|
12
|
+
from vtlengine.AST import VarID
|
|
13
|
+
from vtlengine.AST.ASTTemplate import ASTTemplate
|
|
14
|
+
from vtlengine.AST.DAG import HRDAGAnalyzer
|
|
15
|
+
from vtlengine.AST.DAG._words import DELETE, GLOBAL, INSERT, PERSISTENT
|
|
16
|
+
from vtlengine.AST.Grammar.tokens import (
|
|
17
|
+
AGGREGATE,
|
|
18
|
+
ALL,
|
|
19
|
+
APPLY,
|
|
20
|
+
AS,
|
|
21
|
+
BETWEEN,
|
|
22
|
+
CALC,
|
|
23
|
+
CAST,
|
|
24
|
+
CHECK_DATAPOINT,
|
|
25
|
+
CHECK_HIERARCHY,
|
|
26
|
+
COUNT,
|
|
27
|
+
CURRENT_DATE,
|
|
28
|
+
DATE_ADD,
|
|
29
|
+
DROP,
|
|
30
|
+
EQ,
|
|
31
|
+
EXISTS_IN,
|
|
32
|
+
EXTERNAL,
|
|
33
|
+
FILL_TIME_SERIES,
|
|
34
|
+
FILTER,
|
|
35
|
+
HAVING,
|
|
36
|
+
HIERARCHY,
|
|
37
|
+
INSTR,
|
|
38
|
+
KEEP,
|
|
39
|
+
MEMBERSHIP,
|
|
40
|
+
REPLACE,
|
|
41
|
+
ROUND,
|
|
42
|
+
SUBSTR,
|
|
43
|
+
TRUNC,
|
|
44
|
+
WHEN,
|
|
45
|
+
)
|
|
46
|
+
from vtlengine.DataTypes import (
|
|
47
|
+
BASIC_TYPES,
|
|
48
|
+
SCALAR_TYPES_CLASS_REVERSE,
|
|
49
|
+
Boolean,
|
|
50
|
+
ScalarType,
|
|
51
|
+
check_unary_implicit_promotion,
|
|
52
|
+
)
|
|
53
|
+
from vtlengine.Exceptions import SemanticError
|
|
54
|
+
from vtlengine.files.output import save_datapoints
|
|
55
|
+
from vtlengine.files.output._time_period_representation import TimePeriodRepresentation
|
|
56
|
+
from vtlengine.files.parser import _fill_dataset_empty_data, load_datapoints
|
|
57
|
+
from vtlengine.Model import (
|
|
58
|
+
Component,
|
|
59
|
+
DataComponent,
|
|
60
|
+
Dataset,
|
|
61
|
+
ExternalRoutine,
|
|
62
|
+
Role,
|
|
63
|
+
Scalar,
|
|
64
|
+
ScalarSet,
|
|
65
|
+
ValueDomain,
|
|
66
|
+
)
|
|
67
|
+
from vtlengine.Operators.Aggregation import extract_grouping_identifiers
|
|
68
|
+
from vtlengine.Operators.Assignment import Assignment
|
|
69
|
+
from vtlengine.Operators.CastOperator import Cast
|
|
70
|
+
from vtlengine.Operators.Comparison import Between, ExistIn
|
|
71
|
+
from vtlengine.Operators.Conditional import Case, If
|
|
72
|
+
from vtlengine.Operators.General import Eval
|
|
73
|
+
from vtlengine.Operators.HROperators import (
|
|
74
|
+
HAAssignment,
|
|
75
|
+
Hierarchy,
|
|
76
|
+
get_measure_from_dataset,
|
|
77
|
+
)
|
|
78
|
+
from vtlengine.Operators.Numeric import Round, Trunc
|
|
79
|
+
from vtlengine.Operators.String import Instr, Replace, Substr
|
|
80
|
+
from vtlengine.Operators.Time import (
|
|
81
|
+
Current_Date,
|
|
82
|
+
Date_Add,
|
|
83
|
+
Fill_time_series,
|
|
84
|
+
Time_Aggregation,
|
|
85
|
+
)
|
|
86
|
+
from vtlengine.Operators.Validation import Check, Check_Datapoint, Check_Hierarchy
|
|
87
|
+
from vtlengine.Utils import (
|
|
88
|
+
AGGREGATION_MAPPING,
|
|
89
|
+
ANALYTIC_MAPPING,
|
|
90
|
+
BINARY_MAPPING,
|
|
91
|
+
HR_COMP_MAPPING,
|
|
92
|
+
HR_NUM_BINARY_MAPPING,
|
|
93
|
+
HR_UNARY_MAPPING,
|
|
94
|
+
JOIN_MAPPING,
|
|
95
|
+
REGULAR_AGGREGATION_MAPPING,
|
|
96
|
+
ROLE_SETTER_MAPPING,
|
|
97
|
+
SET_MAPPING,
|
|
98
|
+
THEN_ELSE,
|
|
99
|
+
UNARY_MAPPING,
|
|
100
|
+
)
|
|
101
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
# noinspection PyTypeChecker
|
|
105
|
+
@dataclass
|
|
106
|
+
class InterpreterAnalyzer(ASTTemplate):
|
|
107
|
+
# Model elements
|
|
108
|
+
datasets: Dict[str, Dataset]
|
|
109
|
+
scalars: Optional[Dict[str, Scalar]] = None
|
|
110
|
+
value_domains: Optional[Dict[str, ValueDomain]] = None
|
|
111
|
+
external_routines: Optional[Dict[str, ExternalRoutine]] = None
|
|
112
|
+
# Analysis mode
|
|
113
|
+
only_semantic: bool = False
|
|
114
|
+
# Memory efficient
|
|
115
|
+
ds_analysis: Optional[Dict[str, Any]] = None
|
|
116
|
+
datapoints_paths: Optional[Dict[str, Path]] = None
|
|
117
|
+
output_path: Optional[Union[str, Path]] = None
|
|
118
|
+
# Time Period Representation
|
|
119
|
+
time_period_representation: Optional[TimePeriodRepresentation] = None
|
|
120
|
+
# Return only persistent
|
|
121
|
+
return_only_persistent: bool = True
|
|
122
|
+
# Flags to change behavior
|
|
123
|
+
nested_condition: Union[str, bool] = False
|
|
124
|
+
is_from_assignment: bool = False
|
|
125
|
+
is_from_component_assignment: bool = False
|
|
126
|
+
is_from_regular_aggregation: bool = False
|
|
127
|
+
is_from_grouping: bool = False
|
|
128
|
+
is_from_having: bool = False
|
|
129
|
+
is_from_if: bool = False
|
|
130
|
+
is_from_rule: bool = False
|
|
131
|
+
is_from_join: bool = False
|
|
132
|
+
is_from_condition: bool = False
|
|
133
|
+
is_from_hr_val: bool = False
|
|
134
|
+
is_from_hr_agg: bool = False
|
|
135
|
+
condition_stack: Optional[List[str]] = None
|
|
136
|
+
# Handlers for simplicity
|
|
137
|
+
regular_aggregation_dataset: Optional[Dataset] = None
|
|
138
|
+
aggregation_grouping: Optional[List[str]] = None
|
|
139
|
+
aggregation_dataset: Optional[Dataset] = None
|
|
140
|
+
then_condition_dataset: Optional[List[Any]] = None
|
|
141
|
+
else_condition_dataset: Optional[List[Any]] = None
|
|
142
|
+
ruleset_dataset: Optional[Dataset] = None
|
|
143
|
+
rule_data: Optional[pd.DataFrame] = None
|
|
144
|
+
ruleset_signature: Optional[Dict[str, str]] = None
|
|
145
|
+
udo_params: Optional[List[Dict[str, Any]]] = None
|
|
146
|
+
hr_agg_rules_computed: Optional[Dict[str, pd.DataFrame]] = None
|
|
147
|
+
ruleset_mode: Optional[str] = None
|
|
148
|
+
hr_input: Optional[str] = None
|
|
149
|
+
hr_partial_is_valid: Optional[List[bool]] = None
|
|
150
|
+
hr_condition: Optional[Dict[str, str]] = None
|
|
151
|
+
# DL
|
|
152
|
+
dprs: Optional[Dict[str, Optional[Dict[str, Any]]]] = None
|
|
153
|
+
udos: Optional[Dict[str, Optional[Dict[str, Any]]]] = None
|
|
154
|
+
hrs: Optional[Dict[str, Optional[Dict[str, Any]]]] = None
|
|
155
|
+
is_from_case_then: bool = False
|
|
156
|
+
signature_values: Optional[Dict[str, Any]] = None
|
|
157
|
+
|
|
158
|
+
def __post_init__(self) -> None:
|
|
159
|
+
self.datasets_inputs = set(self.datasets.keys())
|
|
160
|
+
self.scalars_inputs = set(self.scalars.keys()) if self.scalars else set()
|
|
161
|
+
|
|
162
|
+
# **********************************
|
|
163
|
+
# * *
|
|
164
|
+
# * Memory efficient *
|
|
165
|
+
# * *
|
|
166
|
+
# **********************************
|
|
167
|
+
def _load_datapoints_efficient(self, statement_num: int) -> None:
|
|
168
|
+
if self.datapoints_paths is None:
|
|
169
|
+
return
|
|
170
|
+
if self.ds_analysis is None:
|
|
171
|
+
return
|
|
172
|
+
if statement_num not in self.ds_analysis[INSERT]:
|
|
173
|
+
return
|
|
174
|
+
for ds_name in self.ds_analysis[INSERT][statement_num]:
|
|
175
|
+
if ds_name in self.datapoints_paths:
|
|
176
|
+
self.datasets[ds_name].data = load_datapoints(
|
|
177
|
+
self.datasets[ds_name].components,
|
|
178
|
+
ds_name,
|
|
179
|
+
self.datapoints_paths[ds_name],
|
|
180
|
+
)
|
|
181
|
+
elif ds_name in self.datasets and self.datasets[ds_name].data is None:
|
|
182
|
+
_fill_dataset_empty_data(self.datasets[ds_name])
|
|
183
|
+
|
|
184
|
+
def _save_datapoints_efficient(self, statement_num: int) -> None:
|
|
185
|
+
if self.output_path is None:
|
|
186
|
+
# Keeping the data in memory if no output path is provided
|
|
187
|
+
return
|
|
188
|
+
if self.ds_analysis is None:
|
|
189
|
+
return
|
|
190
|
+
if statement_num not in self.ds_analysis[DELETE]:
|
|
191
|
+
return
|
|
192
|
+
for ds_name in self.ds_analysis[DELETE][statement_num]:
|
|
193
|
+
if (
|
|
194
|
+
ds_name not in self.datasets
|
|
195
|
+
or not isinstance(self.datasets[ds_name], Dataset)
|
|
196
|
+
or self.datasets[ds_name].data is None
|
|
197
|
+
):
|
|
198
|
+
continue
|
|
199
|
+
if ds_name in self.ds_analysis[GLOBAL]:
|
|
200
|
+
# We do not save global input datasets, only results of transformations
|
|
201
|
+
self.datasets[ds_name].data = None
|
|
202
|
+
continue
|
|
203
|
+
if self.return_only_persistent and ds_name not in self.ds_analysis[PERSISTENT]:
|
|
204
|
+
self.datasets[ds_name].data = None
|
|
205
|
+
continue
|
|
206
|
+
# Saving only datasets, no scalars
|
|
207
|
+
save_datapoints(
|
|
208
|
+
self.time_period_representation,
|
|
209
|
+
self.datasets[ds_name],
|
|
210
|
+
self.output_path,
|
|
211
|
+
)
|
|
212
|
+
self.datasets[ds_name].data = None
|
|
213
|
+
|
|
214
|
+
def _save_scalars_efficient(self, scalars: Dict[str, Scalar]) -> None:
|
|
215
|
+
output_path = Path(self.output_path) # type: ignore[arg-type]
|
|
216
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
|
217
|
+
result_scalars = dict(scalars)
|
|
218
|
+
if result_scalars:
|
|
219
|
+
sorted(result_scalars.keys())
|
|
220
|
+
file_path = output_path / "_scalars.csv"
|
|
221
|
+
with open(file_path, "w", newline="", encoding="utf-8") as csv_file:
|
|
222
|
+
writer = csv.writer(csv_file)
|
|
223
|
+
writer.writerow(["Name", "Value"])
|
|
224
|
+
for name, scalar in sorted(result_scalars.items(), key=lambda item: item[0]):
|
|
225
|
+
value_to_write = "" if scalar.value is None else scalar.value
|
|
226
|
+
writer.writerow([name, str(value_to_write)])
|
|
227
|
+
|
|
228
|
+
# **********************************
|
|
229
|
+
# * *
|
|
230
|
+
# * AST Visitors *
|
|
231
|
+
# * *
|
|
232
|
+
# **********************************
|
|
233
|
+
|
|
234
|
+
def visit_Start(self, node: AST.Start) -> Any:
|
|
235
|
+
statement_num = 1
|
|
236
|
+
if self.only_semantic:
|
|
237
|
+
Operators.only_semantic = True
|
|
238
|
+
else:
|
|
239
|
+
Operators.only_semantic = False
|
|
240
|
+
results = {}
|
|
241
|
+
scalars_to_save = set()
|
|
242
|
+
invalid_dataset_outputs = []
|
|
243
|
+
invalid_scalar_outputs = []
|
|
244
|
+
for child in node.children:
|
|
245
|
+
if isinstance(child, (AST.Assignment, AST.PersistentAssignment)):
|
|
246
|
+
vtlengine.Exceptions.dataset_output = child.left.value # type: ignore[attr-defined]
|
|
247
|
+
self._load_datapoints_efficient(statement_num)
|
|
248
|
+
if not isinstance(
|
|
249
|
+
child, (AST.HRuleset, AST.DPRuleset, AST.Operator)
|
|
250
|
+
) and not isinstance(child, (AST.Assignment, AST.PersistentAssignment)):
|
|
251
|
+
raise SemanticError("1-2-5")
|
|
252
|
+
result = self.visit(child)
|
|
253
|
+
if isinstance(result, Dataset) and result.name in self.datasets_inputs:
|
|
254
|
+
invalid_dataset_outputs.append(result.name)
|
|
255
|
+
if isinstance(result, Scalar) and result.name in self.scalars_inputs:
|
|
256
|
+
invalid_scalar_outputs.append(result.name)
|
|
257
|
+
|
|
258
|
+
# Reset some handlers (joins and if)
|
|
259
|
+
self.is_from_join = False
|
|
260
|
+
self.condition_stack = None
|
|
261
|
+
self.then_condition_dataset = None
|
|
262
|
+
self.else_condition_dataset = None
|
|
263
|
+
self.nested_condition = False
|
|
264
|
+
|
|
265
|
+
# Reset VirtualCounter
|
|
266
|
+
VirtualCounter.reset()
|
|
267
|
+
|
|
268
|
+
if result is None:
|
|
269
|
+
continue
|
|
270
|
+
|
|
271
|
+
# Removing output dataset
|
|
272
|
+
vtlengine.Exceptions.dataset_output = None
|
|
273
|
+
# Save results
|
|
274
|
+
self.datasets[result.name] = copy(result)
|
|
275
|
+
results[result.name] = result
|
|
276
|
+
if isinstance(result, Scalar):
|
|
277
|
+
scalars_to_save.add(result.name)
|
|
278
|
+
if self.scalars is None:
|
|
279
|
+
self.scalars = {}
|
|
280
|
+
self.scalars[result.name] = copy(result)
|
|
281
|
+
self._save_datapoints_efficient(statement_num)
|
|
282
|
+
statement_num += 1
|
|
283
|
+
if invalid_dataset_outputs:
|
|
284
|
+
raise SemanticError("0-1-2-8", names=", ".join(invalid_dataset_outputs))
|
|
285
|
+
if invalid_scalar_outputs:
|
|
286
|
+
raise SemanticError("0-1-2-8", names=", ".join(invalid_scalar_outputs))
|
|
287
|
+
|
|
288
|
+
if self.output_path is not None and scalars_to_save:
|
|
289
|
+
scalars_filtered = {
|
|
290
|
+
name: self.scalars[name] # type: ignore[index]
|
|
291
|
+
for name in scalars_to_save
|
|
292
|
+
if (not self.return_only_persistent or name in self.ds_analysis.get(PERSISTENT, [])) # type: ignore[union-attr]
|
|
293
|
+
}
|
|
294
|
+
self._save_scalars_efficient(scalars_filtered)
|
|
295
|
+
|
|
296
|
+
return results
|
|
297
|
+
|
|
298
|
+
# Definition Language
|
|
299
|
+
|
|
300
|
+
def visit_Operator(self, node: AST.Operator) -> None:
|
|
301
|
+
if self.udos is None:
|
|
302
|
+
self.udos = {}
|
|
303
|
+
elif node.op in self.udos:
|
|
304
|
+
raise ValueError(f"User Defined Operator {node.op} already exists")
|
|
305
|
+
|
|
306
|
+
param_info: List[Dict[str, Union[str, Type[ScalarType], AST.AST]]] = []
|
|
307
|
+
for param in node.parameters:
|
|
308
|
+
if param.name in [x["name"] for x in param_info]:
|
|
309
|
+
raise ValueError(f"Duplicated Parameter {param.name} in UDO {node.op}")
|
|
310
|
+
# We use a string for model types, but the data type class for basic types
|
|
311
|
+
# (Integer, Number, String, Boolean, ...)
|
|
312
|
+
if isinstance(param.type_, (Dataset, Component, Scalar)):
|
|
313
|
+
type_ = param.type_.__class__.__name__
|
|
314
|
+
else:
|
|
315
|
+
type_ = param.type_
|
|
316
|
+
param_info.append({"name": param.name, "type": type_})
|
|
317
|
+
if param.default is not None:
|
|
318
|
+
param_info[-1]["default"] = param.default
|
|
319
|
+
if len(param_info) > 1:
|
|
320
|
+
previous_default = param_info[0]
|
|
321
|
+
for i in [1, len(param_info) - 1]:
|
|
322
|
+
if previous_default and not param_info[i]:
|
|
323
|
+
raise SemanticError("1-3-12")
|
|
324
|
+
previous_default = param_info[i]
|
|
325
|
+
|
|
326
|
+
self.udos[node.op] = {
|
|
327
|
+
"params": param_info,
|
|
328
|
+
"expression": node.expression,
|
|
329
|
+
"output": node.output_type,
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
def visit_DPRuleset(self, node: AST.DPRuleset) -> None:
|
|
333
|
+
# Rule names are optional, if not provided, they are generated.
|
|
334
|
+
# If provided, all must be provided
|
|
335
|
+
rule_names = [rule.name for rule in node.rules if rule.name is not None]
|
|
336
|
+
if len(rule_names) != 0 and len(node.rules) != len(rule_names):
|
|
337
|
+
raise SemanticError("1-3-1-7", type="Datapoint Ruleset", name=node.name)
|
|
338
|
+
if len(rule_names) == 0:
|
|
339
|
+
for i, rule in enumerate(node.rules):
|
|
340
|
+
rule.name = (i + 1).__str__()
|
|
341
|
+
|
|
342
|
+
if len(rule_names) != len(set(rule_names)):
|
|
343
|
+
not_unique = [name for name in rule_names if rule_names.count(name) > 1]
|
|
344
|
+
raise SemanticError(
|
|
345
|
+
"1-3-1-5",
|
|
346
|
+
type="Datapoint Ruleset",
|
|
347
|
+
names=", ".join(not_unique),
|
|
348
|
+
ruleset_name=node.name,
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
# Signature has the actual parameters names or aliases if provided
|
|
352
|
+
signature_actual_names = {}
|
|
353
|
+
if not isinstance(node.params, AST.DefIdentifier):
|
|
354
|
+
for param in node.params:
|
|
355
|
+
if param.alias is not None:
|
|
356
|
+
signature_actual_names[param.alias] = param.value
|
|
357
|
+
else:
|
|
358
|
+
signature_actual_names[param.value] = param.value
|
|
359
|
+
|
|
360
|
+
ruleset_data = {
|
|
361
|
+
"rules": node.rules,
|
|
362
|
+
"signature": signature_actual_names,
|
|
363
|
+
"params": (
|
|
364
|
+
[x.value for x in node.params]
|
|
365
|
+
if not isinstance(node.params, AST.DefIdentifier)
|
|
366
|
+
else []
|
|
367
|
+
),
|
|
368
|
+
"signature_type": node.signature_type,
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
# Adding the ruleset to the dprs dictionary
|
|
372
|
+
if self.dprs is None:
|
|
373
|
+
self.dprs = {}
|
|
374
|
+
elif node.name in self.dprs:
|
|
375
|
+
raise ValueError(f"Datapoint Ruleset {node.name} already exists")
|
|
376
|
+
|
|
377
|
+
self.dprs[node.name] = ruleset_data
|
|
378
|
+
|
|
379
|
+
def visit_HRuleset(self, node: AST.HRuleset) -> None:
|
|
380
|
+
if self.hrs is None:
|
|
381
|
+
self.hrs = {}
|
|
382
|
+
|
|
383
|
+
if node.name in self.hrs:
|
|
384
|
+
raise ValueError(f"Hierarchical Ruleset {node.name} already exists")
|
|
385
|
+
|
|
386
|
+
rule_names = [rule.name for rule in node.rules if rule.name is not None]
|
|
387
|
+
if len(rule_names) != 0 and len(node.rules) != len(rule_names):
|
|
388
|
+
raise ValueError("All rules must have a name, or none of them")
|
|
389
|
+
if len(rule_names) == 0:
|
|
390
|
+
for i, rule in enumerate(node.rules):
|
|
391
|
+
rule.name = (i + 1).__str__()
|
|
392
|
+
|
|
393
|
+
cond_comp: List[Any] = []
|
|
394
|
+
if isinstance(node.element, list):
|
|
395
|
+
cond_comp = [x.value for x in node.element[:-1]]
|
|
396
|
+
node.element = node.element[-1]
|
|
397
|
+
|
|
398
|
+
signature_actual_name = node.element.value
|
|
399
|
+
|
|
400
|
+
ruleset_data = {
|
|
401
|
+
"rules": node.rules,
|
|
402
|
+
"signature": signature_actual_name,
|
|
403
|
+
"condition": cond_comp,
|
|
404
|
+
"node": node,
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
self.hrs[node.name] = ruleset_data
|
|
408
|
+
|
|
409
|
+
# Execution Language
|
|
410
|
+
def visit_Assignment(self, node: AST.Assignment) -> Any:
|
|
411
|
+
if (
|
|
412
|
+
self.is_from_join
|
|
413
|
+
and isinstance(node.left, AST.Identifier)
|
|
414
|
+
and node.left.kind == "ComponentID"
|
|
415
|
+
):
|
|
416
|
+
self.is_from_component_assignment = True
|
|
417
|
+
self.is_from_assignment = True
|
|
418
|
+
left_operand: str = self.visit(node.left)
|
|
419
|
+
self.is_from_assignment = False
|
|
420
|
+
right_operand: Union[Dataset, DataComponent] = self.visit(node.right)
|
|
421
|
+
self.is_from_component_assignment = False
|
|
422
|
+
result = Assignment.analyze(left_operand, right_operand)
|
|
423
|
+
if isinstance(result, (Dataset, Scalar)):
|
|
424
|
+
result.persistent = isinstance(node, AST.PersistentAssignment)
|
|
425
|
+
return result
|
|
426
|
+
|
|
427
|
+
def visit_PersistentAssignment(self, node: AST.PersistentAssignment) -> Any:
|
|
428
|
+
return self.visit_Assignment(node)
|
|
429
|
+
|
|
430
|
+
def visit_ParFunction(self, node: AST.ParFunction) -> Any:
|
|
431
|
+
return self.visit(node.operand)
|
|
432
|
+
|
|
433
|
+
def visit_BinOp(self, node: AST.BinOp) -> Any:
|
|
434
|
+
is_from_if = False
|
|
435
|
+
if (
|
|
436
|
+
not self.is_from_condition
|
|
437
|
+
and node.op != MEMBERSHIP
|
|
438
|
+
and self.condition_stack is not None
|
|
439
|
+
and len(self.condition_stack) > 0
|
|
440
|
+
):
|
|
441
|
+
is_from_if = self.is_from_if
|
|
442
|
+
self.is_from_if = False
|
|
443
|
+
|
|
444
|
+
if (
|
|
445
|
+
self.is_from_join
|
|
446
|
+
and node.op in [MEMBERSHIP, AGGREGATE]
|
|
447
|
+
and hasattr(node.left, "value")
|
|
448
|
+
and hasattr(node.right, "value")
|
|
449
|
+
):
|
|
450
|
+
if self.udo_params is not None and node.right.value in self.udo_params[-1]:
|
|
451
|
+
comp_name = f"{node.left.value}#{self.udo_params[-1][node.right.value]}"
|
|
452
|
+
else:
|
|
453
|
+
comp_name = f"{node.left.value}#{node.right.value}"
|
|
454
|
+
ast_var_id = AST.VarID(
|
|
455
|
+
value=comp_name,
|
|
456
|
+
line_start=node.right.line_start,
|
|
457
|
+
line_stop=node.right.line_stop,
|
|
458
|
+
column_start=node.right.column_start,
|
|
459
|
+
column_stop=node.right.column_stop,
|
|
460
|
+
)
|
|
461
|
+
return self.visit(ast_var_id)
|
|
462
|
+
left_operand = self.visit(node.left)
|
|
463
|
+
right_operand = self.visit(node.right)
|
|
464
|
+
if is_from_if:
|
|
465
|
+
left_operand, right_operand = self.merge_then_else_datasets(left_operand, right_operand)
|
|
466
|
+
if node.op == MEMBERSHIP:
|
|
467
|
+
if right_operand not in left_operand.components and "#" in right_operand:
|
|
468
|
+
right_operand = right_operand.split("#")[1]
|
|
469
|
+
if self.is_from_component_assignment:
|
|
470
|
+
return BINARY_MAPPING[node.op].analyze(
|
|
471
|
+
left_operand, right_operand, self.is_from_component_assignment
|
|
472
|
+
)
|
|
473
|
+
elif self.is_from_regular_aggregation:
|
|
474
|
+
raise SemanticError("1-1-6-6", dataset_name=left_operand, comp_name=right_operand)
|
|
475
|
+
elif len(left_operand.get_identifiers()) == 0:
|
|
476
|
+
raise SemanticError("1-2-10", op=node.op)
|
|
477
|
+
return BINARY_MAPPING[node.op].analyze(left_operand, right_operand)
|
|
478
|
+
|
|
479
|
+
def visit_UnaryOp(self, node: AST.UnaryOp) -> None:
|
|
480
|
+
operand = self.visit(node.operand)
|
|
481
|
+
if node.op not in UNARY_MAPPING and node.op not in ROLE_SETTER_MAPPING:
|
|
482
|
+
raise NotImplementedError
|
|
483
|
+
if (
|
|
484
|
+
self.is_from_regular_aggregation
|
|
485
|
+
and self.regular_aggregation_dataset is not None
|
|
486
|
+
and node.op in ROLE_SETTER_MAPPING
|
|
487
|
+
):
|
|
488
|
+
if self.regular_aggregation_dataset.data is None:
|
|
489
|
+
data_size = 0
|
|
490
|
+
else:
|
|
491
|
+
data_size = len(self.regular_aggregation_dataset.data)
|
|
492
|
+
return ROLE_SETTER_MAPPING[node.op].analyze(operand, data_size)
|
|
493
|
+
return UNARY_MAPPING[node.op].analyze(operand)
|
|
494
|
+
|
|
495
|
+
def visit_Aggregation(self, node: AST.Aggregation) -> None:
|
|
496
|
+
# Having takes precedence as it is lower in the AST
|
|
497
|
+
if self.is_from_having:
|
|
498
|
+
if node.operand is not None:
|
|
499
|
+
self.visit(node.operand)
|
|
500
|
+
operand = self.aggregation_dataset
|
|
501
|
+
elif self.is_from_regular_aggregation and self.regular_aggregation_dataset is not None:
|
|
502
|
+
operand = self.regular_aggregation_dataset
|
|
503
|
+
if node.operand is not None and operand is not None:
|
|
504
|
+
op_comp: DataComponent = self.visit(node.operand)
|
|
505
|
+
comps_to_keep = {}
|
|
506
|
+
for (
|
|
507
|
+
comp_name,
|
|
508
|
+
comp,
|
|
509
|
+
) in self.regular_aggregation_dataset.components.items():
|
|
510
|
+
if comp.role == Role.IDENTIFIER:
|
|
511
|
+
comps_to_keep[comp_name] = copy(comp)
|
|
512
|
+
comps_to_keep[op_comp.name] = Component(
|
|
513
|
+
name=op_comp.name,
|
|
514
|
+
data_type=op_comp.data_type,
|
|
515
|
+
role=op_comp.role,
|
|
516
|
+
nullable=op_comp.nullable,
|
|
517
|
+
)
|
|
518
|
+
if operand.data is not None:
|
|
519
|
+
data_to_keep = operand.data[operand.get_identifiers_names()]
|
|
520
|
+
data_to_keep[op_comp.name] = op_comp.data
|
|
521
|
+
else:
|
|
522
|
+
data_to_keep = None
|
|
523
|
+
operand = Dataset(name=operand.name, components=comps_to_keep, data=data_to_keep)
|
|
524
|
+
else:
|
|
525
|
+
operand = self.visit(node.operand)
|
|
526
|
+
|
|
527
|
+
if not isinstance(operand, Dataset):
|
|
528
|
+
raise SemanticError("2-3-4", op=node.op, comp="dataset")
|
|
529
|
+
|
|
530
|
+
for comp in operand.components.values():
|
|
531
|
+
if isinstance(comp.data_type, ScalarType):
|
|
532
|
+
raise SemanticError("2-1-12-1", op=node.op)
|
|
533
|
+
|
|
534
|
+
if node.having_clause is not None and node.grouping is None:
|
|
535
|
+
raise SemanticError("1-2-13")
|
|
536
|
+
|
|
537
|
+
groupings: Any = []
|
|
538
|
+
having = None
|
|
539
|
+
grouping_op = node.grouping_op
|
|
540
|
+
if node.grouping is not None:
|
|
541
|
+
if grouping_op == "group all":
|
|
542
|
+
data = None if self.only_semantic else copy(operand.data)
|
|
543
|
+
self.aggregation_dataset = Dataset(
|
|
544
|
+
name=operand.name, components=operand.components, data=data
|
|
545
|
+
)
|
|
546
|
+
# For Component handling in operators like time_agg
|
|
547
|
+
self.is_from_grouping = True
|
|
548
|
+
for x in node.grouping:
|
|
549
|
+
groupings.append(self.visit(x))
|
|
550
|
+
self.is_from_grouping = False
|
|
551
|
+
if grouping_op == "group all":
|
|
552
|
+
comp_grouped = groupings[0]
|
|
553
|
+
if (
|
|
554
|
+
operand.data is not None
|
|
555
|
+
and comp_grouped.data is not None
|
|
556
|
+
and len(comp_grouped.data) > 0
|
|
557
|
+
):
|
|
558
|
+
operand.data[comp_grouped.name] = comp_grouped.data
|
|
559
|
+
groupings = [comp_grouped.name]
|
|
560
|
+
self.aggregation_dataset = None
|
|
561
|
+
if node.having_clause is not None:
|
|
562
|
+
self.aggregation_dataset = Dataset(
|
|
563
|
+
name=operand.name,
|
|
564
|
+
components=deepcopy(operand.components),
|
|
565
|
+
data=pd.DataFrame(columns=operand.get_components_names()),
|
|
566
|
+
)
|
|
567
|
+
self.aggregation_grouping = extract_grouping_identifiers(
|
|
568
|
+
operand.get_identifiers_names(), node.grouping_op, groupings
|
|
569
|
+
)
|
|
570
|
+
self.is_from_having = True
|
|
571
|
+
# Empty data analysis on having - we do not care about the result
|
|
572
|
+
self.visit(node.having_clause)
|
|
573
|
+
# Reset to default values
|
|
574
|
+
self.is_from_having = False
|
|
575
|
+
self.aggregation_grouping = None
|
|
576
|
+
self.aggregation_dataset = None
|
|
577
|
+
having = getattr(node.having_clause, "expr", "")
|
|
578
|
+
having = self._format_having_expression_udo(having)
|
|
579
|
+
|
|
580
|
+
elif self.is_from_having:
|
|
581
|
+
groupings = self.aggregation_grouping
|
|
582
|
+
# Setting here group by as we have already selected the identifiers we need
|
|
583
|
+
grouping_op = "group by"
|
|
584
|
+
|
|
585
|
+
result = AGGREGATION_MAPPING[node.op].analyze(operand, grouping_op, groupings, having)
|
|
586
|
+
if not self.is_from_regular_aggregation:
|
|
587
|
+
result.name = VirtualCounter._new_ds_name()
|
|
588
|
+
return result
|
|
589
|
+
|
|
590
|
+
def _format_having_expression_udo(self, having: str) -> str:
|
|
591
|
+
if self.udo_params is None:
|
|
592
|
+
return having
|
|
593
|
+
for k, v in self.udo_params[-1].items():
|
|
594
|
+
old_param = None
|
|
595
|
+
if f"{k} " in having:
|
|
596
|
+
old_param = f"{k} "
|
|
597
|
+
elif f" {k}" in having:
|
|
598
|
+
old_param = f" {k}"
|
|
599
|
+
if old_param is not None:
|
|
600
|
+
if isinstance(v, str):
|
|
601
|
+
new_param = f" {v}"
|
|
602
|
+
elif isinstance(v, (Dataset, Scalar)):
|
|
603
|
+
new_param = f" {v.name}"
|
|
604
|
+
else:
|
|
605
|
+
new_param = f" {v.value}"
|
|
606
|
+
having = having.replace(old_param, new_param)
|
|
607
|
+
return having
|
|
608
|
+
|
|
609
|
+
def visit_Analytic(self, node: AST.Analytic) -> Any: # noqa: C901
|
|
610
|
+
component_name = None
|
|
611
|
+
if self.is_from_regular_aggregation:
|
|
612
|
+
if self.regular_aggregation_dataset is None:
|
|
613
|
+
raise SemanticError("1-1-6-10")
|
|
614
|
+
if node.operand is None:
|
|
615
|
+
operand = self.regular_aggregation_dataset
|
|
616
|
+
else:
|
|
617
|
+
operand_comp = self.visit(node.operand)
|
|
618
|
+
component_name = operand_comp.name
|
|
619
|
+
id_names = self.regular_aggregation_dataset.get_identifiers_names()
|
|
620
|
+
measure_names = self.regular_aggregation_dataset.get_measures_names()
|
|
621
|
+
attribute_names = self.regular_aggregation_dataset.get_attributes_names()
|
|
622
|
+
dataset_components = self.regular_aggregation_dataset.components.copy()
|
|
623
|
+
for name in measure_names + attribute_names:
|
|
624
|
+
dataset_components.pop(name)
|
|
625
|
+
|
|
626
|
+
dataset_components[operand_comp.name] = Component(
|
|
627
|
+
name=operand_comp.name,
|
|
628
|
+
data_type=operand_comp.data_type,
|
|
629
|
+
role=operand_comp.role,
|
|
630
|
+
nullable=operand_comp.nullable,
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
if self.only_semantic or self.regular_aggregation_dataset.data is None:
|
|
634
|
+
data = None
|
|
635
|
+
else:
|
|
636
|
+
data = self.regular_aggregation_dataset.data[id_names].copy()
|
|
637
|
+
data[operand_comp.name] = operand_comp.data
|
|
638
|
+
|
|
639
|
+
operand = Dataset(
|
|
640
|
+
name=self.regular_aggregation_dataset.name,
|
|
641
|
+
components=dataset_components,
|
|
642
|
+
data=data,
|
|
643
|
+
)
|
|
644
|
+
|
|
645
|
+
else:
|
|
646
|
+
operand = self.visit(node.operand)
|
|
647
|
+
partitioning: Any = []
|
|
648
|
+
ordering = []
|
|
649
|
+
if self.udo_params is not None:
|
|
650
|
+
if node.partition_by is not None:
|
|
651
|
+
for comp_name in node.partition_by:
|
|
652
|
+
if comp_name in self.udo_params[-1]:
|
|
653
|
+
partitioning.append(self.udo_params[-1][comp_name])
|
|
654
|
+
elif comp_name in operand.get_identifiers_names():
|
|
655
|
+
partitioning.append(comp_name)
|
|
656
|
+
else:
|
|
657
|
+
raise SemanticError(
|
|
658
|
+
"2-3-9",
|
|
659
|
+
comp_type="Component",
|
|
660
|
+
comp_name=comp_name,
|
|
661
|
+
param="UDO parameters",
|
|
662
|
+
)
|
|
663
|
+
if node.order_by is not None:
|
|
664
|
+
for o in node.order_by:
|
|
665
|
+
if o.component in self.udo_params[-1]:
|
|
666
|
+
o.component = self.udo_params[-1][o.component]
|
|
667
|
+
elif o.component not in operand.get_identifiers_names():
|
|
668
|
+
raise SemanticError(
|
|
669
|
+
"2-3-9",
|
|
670
|
+
comp_type="Component",
|
|
671
|
+
comp_name=o.component,
|
|
672
|
+
param="UDO parameters",
|
|
673
|
+
)
|
|
674
|
+
ordering = node.order_by
|
|
675
|
+
|
|
676
|
+
else:
|
|
677
|
+
partitioning = node.partition_by
|
|
678
|
+
ordering = node.order_by if node.order_by is not None else []
|
|
679
|
+
if not isinstance(operand, Dataset):
|
|
680
|
+
raise SemanticError("2-3-4", op=node.op, comp="dataset")
|
|
681
|
+
if node.partition_by is None:
|
|
682
|
+
order_components = (
|
|
683
|
+
[x.component for x in node.order_by] if node.order_by is not None else []
|
|
684
|
+
)
|
|
685
|
+
partitioning = [x for x in operand.get_identifiers_names() if x not in order_components]
|
|
686
|
+
|
|
687
|
+
params = []
|
|
688
|
+
if node.params is not None:
|
|
689
|
+
for param in node.params:
|
|
690
|
+
if isinstance(param, AST.Constant):
|
|
691
|
+
params.append(param.value)
|
|
692
|
+
else:
|
|
693
|
+
params.append(param)
|
|
694
|
+
|
|
695
|
+
result = ANALYTIC_MAPPING[node.op].analyze(
|
|
696
|
+
operand=operand,
|
|
697
|
+
partitioning=partitioning,
|
|
698
|
+
ordering=ordering,
|
|
699
|
+
window=node.window,
|
|
700
|
+
params=params,
|
|
701
|
+
component_name=component_name,
|
|
702
|
+
)
|
|
703
|
+
if not self.is_from_regular_aggregation:
|
|
704
|
+
return result
|
|
705
|
+
|
|
706
|
+
# Extracting the components we need (only identifiers)
|
|
707
|
+
id_columns = (
|
|
708
|
+
self.regular_aggregation_dataset.get_identifiers_names()
|
|
709
|
+
if (self.regular_aggregation_dataset is not None)
|
|
710
|
+
else None
|
|
711
|
+
)
|
|
712
|
+
|
|
713
|
+
# # Extracting the component we need (only measure)
|
|
714
|
+
if component_name is None or node.op == COUNT:
|
|
715
|
+
measure_name = result.get_measures_names()[0]
|
|
716
|
+
else:
|
|
717
|
+
measure_name = component_name
|
|
718
|
+
# Joining the result with the original dataset
|
|
719
|
+
if self.only_semantic:
|
|
720
|
+
data = None
|
|
721
|
+
else:
|
|
722
|
+
if (
|
|
723
|
+
self.regular_aggregation_dataset is not None
|
|
724
|
+
and self.regular_aggregation_dataset.data is not None
|
|
725
|
+
):
|
|
726
|
+
joined_result = pd.merge(
|
|
727
|
+
self.regular_aggregation_dataset.data[id_columns],
|
|
728
|
+
result.data,
|
|
729
|
+
on=id_columns,
|
|
730
|
+
how="inner",
|
|
731
|
+
)
|
|
732
|
+
data = joined_result[measure_name]
|
|
733
|
+
else:
|
|
734
|
+
data = None
|
|
735
|
+
|
|
736
|
+
return DataComponent(
|
|
737
|
+
name=measure_name,
|
|
738
|
+
data=data,
|
|
739
|
+
data_type=result.components[measure_name].data_type,
|
|
740
|
+
role=result.components[measure_name].role,
|
|
741
|
+
nullable=result.components[measure_name].nullable,
|
|
742
|
+
)
|
|
743
|
+
|
|
744
|
+
def visit_MulOp(self, node: AST.MulOp) -> None:
|
|
745
|
+
"""
|
|
746
|
+
MulOp: (op, children)
|
|
747
|
+
|
|
748
|
+
op: BETWEEN : 'between'.
|
|
749
|
+
|
|
750
|
+
Basic usage:
|
|
751
|
+
|
|
752
|
+
for child in node.children:
|
|
753
|
+
self.visit(child)
|
|
754
|
+
"""
|
|
755
|
+
# Comparison Operators
|
|
756
|
+
if node.op == BETWEEN:
|
|
757
|
+
operand_element = self.visit(node.children[0])
|
|
758
|
+
from_element = self.visit(node.children[1])
|
|
759
|
+
to_element = self.visit(node.children[2])
|
|
760
|
+
|
|
761
|
+
return Between.analyze(operand_element, from_element, to_element)
|
|
762
|
+
|
|
763
|
+
# Comparison Operators
|
|
764
|
+
elif node.op == EXISTS_IN:
|
|
765
|
+
dataset_1 = self.visit(node.children[0])
|
|
766
|
+
if not isinstance(dataset_1, Dataset):
|
|
767
|
+
raise SemanticError("2-3-11", pos="First")
|
|
768
|
+
dataset_2 = self.visit(node.children[1])
|
|
769
|
+
if not isinstance(dataset_2, Dataset):
|
|
770
|
+
raise SemanticError("2-3-11", pos="Second")
|
|
771
|
+
|
|
772
|
+
retain_element = None
|
|
773
|
+
if len(node.children) == 3:
|
|
774
|
+
retain_element = self.visit(node.children[2])
|
|
775
|
+
if isinstance(retain_element, Scalar):
|
|
776
|
+
retain_element = retain_element.value
|
|
777
|
+
if retain_element == ALL:
|
|
778
|
+
retain_element = None
|
|
779
|
+
|
|
780
|
+
return ExistIn.analyze(dataset_1, dataset_2, retain_element)
|
|
781
|
+
|
|
782
|
+
# Set Operators.
|
|
783
|
+
elif node.op in SET_MAPPING:
|
|
784
|
+
datasets = []
|
|
785
|
+
for child in node.children:
|
|
786
|
+
datasets.append(self.visit(child))
|
|
787
|
+
|
|
788
|
+
for ds in datasets:
|
|
789
|
+
if not isinstance(ds, Dataset):
|
|
790
|
+
raise ValueError(f"Expected dataset, got {type(ds).__name__}")
|
|
791
|
+
|
|
792
|
+
return SET_MAPPING[node.op].analyze(datasets)
|
|
793
|
+
|
|
794
|
+
elif node.op == CURRENT_DATE:
|
|
795
|
+
return Current_Date.analyze()
|
|
796
|
+
|
|
797
|
+
else:
|
|
798
|
+
raise SemanticError("1-3-5", op_type="MulOp", node_op=node.op)
|
|
799
|
+
|
|
800
|
+
def visit_VarID(self, node: AST.VarID) -> Any: # noqa: C901
|
|
801
|
+
if self.is_from_assignment:
|
|
802
|
+
return node.value
|
|
803
|
+
# Having takes precedence as it is lower in the AST
|
|
804
|
+
if self.udo_params is not None and node.value in self.udo_params[-1]:
|
|
805
|
+
udo_element = copy(self.udo_params[-1][node.value])
|
|
806
|
+
if isinstance(udo_element, (Scalar, Dataset, DataComponent)):
|
|
807
|
+
return udo_element
|
|
808
|
+
# If it is only the component or dataset name, we rename the node.value
|
|
809
|
+
node.value = udo_element
|
|
810
|
+
if self.aggregation_dataset is not None and (self.is_from_having or self.is_from_grouping):
|
|
811
|
+
if node.value not in self.aggregation_dataset.components:
|
|
812
|
+
raise SemanticError(
|
|
813
|
+
"1-1-1-10",
|
|
814
|
+
op=None,
|
|
815
|
+
comp_name=node.value,
|
|
816
|
+
dataset_name=self.aggregation_dataset.name,
|
|
817
|
+
)
|
|
818
|
+
if self.aggregation_dataset.data is None:
|
|
819
|
+
data = None
|
|
820
|
+
else:
|
|
821
|
+
data = copy(self.aggregation_dataset.data[node.value])
|
|
822
|
+
return DataComponent(
|
|
823
|
+
name=node.value,
|
|
824
|
+
data=data,
|
|
825
|
+
data_type=self.aggregation_dataset.components[node.value].data_type,
|
|
826
|
+
role=self.aggregation_dataset.components[node.value].role,
|
|
827
|
+
nullable=self.aggregation_dataset.components[node.value].nullable,
|
|
828
|
+
)
|
|
829
|
+
if self.is_from_regular_aggregation:
|
|
830
|
+
if self.is_from_join and node.value in self.datasets:
|
|
831
|
+
return copy(self.datasets[node.value])
|
|
832
|
+
if self.regular_aggregation_dataset is not None:
|
|
833
|
+
if self.scalars is not None and node.value in self.scalars:
|
|
834
|
+
if node.value in self.regular_aggregation_dataset.components:
|
|
835
|
+
raise SemanticError("1-1-6-11", comp_name=node.value)
|
|
836
|
+
return copy(self.scalars[node.value])
|
|
837
|
+
if self.regular_aggregation_dataset.data is not None:
|
|
838
|
+
if (
|
|
839
|
+
self.is_from_join
|
|
840
|
+
and node.value
|
|
841
|
+
not in self.regular_aggregation_dataset.get_components_names()
|
|
842
|
+
):
|
|
843
|
+
is_partial_present = 0
|
|
844
|
+
found_comp = None
|
|
845
|
+
for comp_name in self.regular_aggregation_dataset.get_components_names():
|
|
846
|
+
if (
|
|
847
|
+
"#" in comp_name
|
|
848
|
+
and comp_name.split("#")[1] == node.value
|
|
849
|
+
or "#" in node.value
|
|
850
|
+
and node.value.split("#")[1] == comp_name
|
|
851
|
+
):
|
|
852
|
+
is_partial_present += 1
|
|
853
|
+
found_comp = comp_name
|
|
854
|
+
if is_partial_present == 0:
|
|
855
|
+
raise SemanticError(
|
|
856
|
+
"1-1-1-10",
|
|
857
|
+
comp_name=node.value,
|
|
858
|
+
dataset_name=self.regular_aggregation_dataset.name,
|
|
859
|
+
)
|
|
860
|
+
elif is_partial_present == 2:
|
|
861
|
+
raise SemanticError("1-1-13-9", comp_name=node.value)
|
|
862
|
+
node.value = found_comp # type:ignore[assignment]
|
|
863
|
+
if node.value not in self.regular_aggregation_dataset.components:
|
|
864
|
+
raise SemanticError(
|
|
865
|
+
"1-1-1-10",
|
|
866
|
+
comp_name=node.value,
|
|
867
|
+
dataset_name=self.regular_aggregation_dataset.name,
|
|
868
|
+
)
|
|
869
|
+
data = copy(self.regular_aggregation_dataset.data[node.value])
|
|
870
|
+
else:
|
|
871
|
+
data = None
|
|
872
|
+
return DataComponent(
|
|
873
|
+
name=node.value,
|
|
874
|
+
data=data,
|
|
875
|
+
data_type=self.regular_aggregation_dataset.components[node.value].data_type,
|
|
876
|
+
role=self.regular_aggregation_dataset.components[node.value].role,
|
|
877
|
+
nullable=self.regular_aggregation_dataset.components[node.value].nullable,
|
|
878
|
+
)
|
|
879
|
+
if (
|
|
880
|
+
self.is_from_rule
|
|
881
|
+
and self.ruleset_dataset is not None
|
|
882
|
+
and self.ruleset_signature is not None
|
|
883
|
+
):
|
|
884
|
+
if node.value not in self.ruleset_signature:
|
|
885
|
+
raise SemanticError("1-1-10-7", comp_name=node.value)
|
|
886
|
+
comp_name = self.ruleset_signature[node.value]
|
|
887
|
+
if comp_name not in self.ruleset_dataset.components:
|
|
888
|
+
raise SemanticError(
|
|
889
|
+
"1-1-1-10",
|
|
890
|
+
comp_name=node.value,
|
|
891
|
+
dataset_name=self.ruleset_dataset.name,
|
|
892
|
+
)
|
|
893
|
+
data = None if self.rule_data is None else self.rule_data[comp_name]
|
|
894
|
+
return DataComponent(
|
|
895
|
+
name=comp_name,
|
|
896
|
+
data=data,
|
|
897
|
+
data_type=self.ruleset_dataset.components[comp_name].data_type,
|
|
898
|
+
role=self.ruleset_dataset.components[comp_name].role,
|
|
899
|
+
nullable=self.ruleset_dataset.components[comp_name].nullable,
|
|
900
|
+
)
|
|
901
|
+
if self.scalars and node.value in self.scalars:
|
|
902
|
+
return copy(self.scalars[node.value])
|
|
903
|
+
if node.value not in self.datasets:
|
|
904
|
+
raise SemanticError("2-3-6", dataset_name=node.value)
|
|
905
|
+
|
|
906
|
+
return copy(self.datasets[node.value])
|
|
907
|
+
|
|
908
|
+
def visit_Collection(self, node: AST.Collection) -> Any:
|
|
909
|
+
if node.kind == "Set":
|
|
910
|
+
elements = []
|
|
911
|
+
duplicates = []
|
|
912
|
+
for child in node.children:
|
|
913
|
+
ref_element = child.children[1] if isinstance(child, AST.ParamOp) else child
|
|
914
|
+
if ref_element in elements:
|
|
915
|
+
duplicates.append(ref_element)
|
|
916
|
+
elements.append(self.visit(child).value)
|
|
917
|
+
if len(duplicates) > 0:
|
|
918
|
+
raise SemanticError("1-2-5", duplicates=duplicates)
|
|
919
|
+
for element in elements:
|
|
920
|
+
if type(element) is not type(elements[0]):
|
|
921
|
+
raise Exception("All elements in a set must be of the same type")
|
|
922
|
+
if len(elements) == 0:
|
|
923
|
+
raise Exception("A set must contain at least one element")
|
|
924
|
+
if len(elements) != len(set(elements)):
|
|
925
|
+
raise Exception("A set must not contain duplicates")
|
|
926
|
+
return ScalarSet(data_type=BASIC_TYPES[type(elements[0])], values=elements)
|
|
927
|
+
elif node.kind == "ValueDomain":
|
|
928
|
+
if self.value_domains is None:
|
|
929
|
+
raise SemanticError("2-3-10", comp_type="Value Domains")
|
|
930
|
+
if node.name not in self.value_domains:
|
|
931
|
+
raise SemanticError("1-2-8", name=node.name)
|
|
932
|
+
vd = self.value_domains[node.name]
|
|
933
|
+
return ScalarSet(data_type=vd.type, values=vd.setlist)
|
|
934
|
+
else:
|
|
935
|
+
raise SemanticError("1-2-9", name=node.name)
|
|
936
|
+
|
|
937
|
+
def visit_RegularAggregation(self, node: AST.RegularAggregation) -> None: # noqa: C901
|
|
938
|
+
operands = []
|
|
939
|
+
dataset = self.visit(node.dataset)
|
|
940
|
+
if isinstance(dataset, Scalar):
|
|
941
|
+
raise SemanticError("1-1-1-20", op=node.op)
|
|
942
|
+
self.regular_aggregation_dataset = dataset
|
|
943
|
+
if node.op == APPLY:
|
|
944
|
+
op_map = BINARY_MAPPING
|
|
945
|
+
return REGULAR_AGGREGATION_MAPPING[node.op].analyze(dataset, node.children, op_map)
|
|
946
|
+
for child in node.children:
|
|
947
|
+
self.is_from_regular_aggregation = True
|
|
948
|
+
operands.append(self.visit(child))
|
|
949
|
+
self.is_from_regular_aggregation = False
|
|
950
|
+
if node.op == CALC and any(isinstance(operand, Dataset) for operand in operands):
|
|
951
|
+
raise SemanticError("1-2-14", op=node.op)
|
|
952
|
+
if node.op == AGGREGATE:
|
|
953
|
+
# Extracting the role encoded inside the children assignments
|
|
954
|
+
role_info = {
|
|
955
|
+
child.left.value: child.left.role
|
|
956
|
+
for child in node.children
|
|
957
|
+
if hasattr(child, "left")
|
|
958
|
+
}
|
|
959
|
+
dataset = copy(operands[0])
|
|
960
|
+
if self.regular_aggregation_dataset is not None:
|
|
961
|
+
dataset.name = self.regular_aggregation_dataset.name
|
|
962
|
+
dataset.components = {
|
|
963
|
+
comp_name: comp
|
|
964
|
+
for comp_name, comp in dataset.components.items()
|
|
965
|
+
if comp.role != Role.MEASURE
|
|
966
|
+
}
|
|
967
|
+
if dataset.data is not None:
|
|
968
|
+
dataset.data = dataset.data[dataset.get_identifiers_names()]
|
|
969
|
+
aux_operands = []
|
|
970
|
+
for operand in operands:
|
|
971
|
+
measure = operand.get_component(operand.get_measures_names()[0])
|
|
972
|
+
data = operand.data[measure.name] if operand.data is not None else None
|
|
973
|
+
# Getting role from encoded information
|
|
974
|
+
# (handling also UDO params as it is present in the value of the mapping)
|
|
975
|
+
if self.udo_params is not None and operand.name in self.udo_params[-1].values():
|
|
976
|
+
role = None
|
|
977
|
+
for k, v in self.udo_params[-1].items():
|
|
978
|
+
if isinstance(v, str) and v == operand.name:
|
|
979
|
+
role_key = k
|
|
980
|
+
role = role_info[role_key]
|
|
981
|
+
else:
|
|
982
|
+
role = role_info[operand.name]
|
|
983
|
+
aux_operands.append(
|
|
984
|
+
DataComponent(
|
|
985
|
+
name=operand.name,
|
|
986
|
+
data=data,
|
|
987
|
+
data_type=measure.data_type,
|
|
988
|
+
role=role if role is not None else measure.role,
|
|
989
|
+
nullable=measure.nullable,
|
|
990
|
+
)
|
|
991
|
+
)
|
|
992
|
+
operands = aux_operands
|
|
993
|
+
self.regular_aggregation_dataset = None
|
|
994
|
+
if node.op == FILTER:
|
|
995
|
+
if not isinstance(operands[0], DataComponent) and hasattr(child, "left"):
|
|
996
|
+
measure = child.left.value
|
|
997
|
+
operands[0] = DataComponent(
|
|
998
|
+
name=measure,
|
|
999
|
+
data=operands[0].data[measure],
|
|
1000
|
+
data_type=operands[0].components[measure].data_type,
|
|
1001
|
+
role=operands[0].components[measure].role,
|
|
1002
|
+
nullable=operands[0].components[measure].nullable,
|
|
1003
|
+
)
|
|
1004
|
+
return REGULAR_AGGREGATION_MAPPING[node.op].analyze(operands[0], dataset)
|
|
1005
|
+
if self.is_from_join:
|
|
1006
|
+
if node.op in [DROP, KEEP]:
|
|
1007
|
+
operands = [
|
|
1008
|
+
(
|
|
1009
|
+
operand.get_measures_names()
|
|
1010
|
+
if isinstance(operand, Dataset)
|
|
1011
|
+
else (
|
|
1012
|
+
operand.name
|
|
1013
|
+
if isinstance(operand, DataComponent)
|
|
1014
|
+
and operand.role is not Role.IDENTIFIER
|
|
1015
|
+
else operand
|
|
1016
|
+
)
|
|
1017
|
+
)
|
|
1018
|
+
for operand in operands
|
|
1019
|
+
]
|
|
1020
|
+
operands = list(
|
|
1021
|
+
set(
|
|
1022
|
+
[
|
|
1023
|
+
item
|
|
1024
|
+
for sublist in operands
|
|
1025
|
+
for item in (sublist if isinstance(sublist, list) else [sublist])
|
|
1026
|
+
]
|
|
1027
|
+
)
|
|
1028
|
+
)
|
|
1029
|
+
result = REGULAR_AGGREGATION_MAPPING[node.op].analyze(operands, dataset)
|
|
1030
|
+
if node.isLast:
|
|
1031
|
+
if result.data is not None:
|
|
1032
|
+
result.data.rename(
|
|
1033
|
+
columns={col: col[col.find("#") + 1 :] for col in result.data.columns},
|
|
1034
|
+
inplace=True,
|
|
1035
|
+
)
|
|
1036
|
+
result.components = {
|
|
1037
|
+
comp_name[comp_name.find("#") + 1 :]: comp
|
|
1038
|
+
for comp_name, comp in result.components.items()
|
|
1039
|
+
}
|
|
1040
|
+
for comp in result.components.values():
|
|
1041
|
+
comp.name = comp.name[comp.name.find("#") + 1 :]
|
|
1042
|
+
if result.data is not None:
|
|
1043
|
+
result.data.reset_index(drop=True, inplace=True)
|
|
1044
|
+
self.is_from_join = False
|
|
1045
|
+
return result
|
|
1046
|
+
return REGULAR_AGGREGATION_MAPPING[node.op].analyze(operands, dataset)
|
|
1047
|
+
|
|
1048
|
+
def visit_If(self, node: AST.If) -> Dataset:
|
|
1049
|
+
self.is_from_condition = True
|
|
1050
|
+
condition = self.visit(node.condition)
|
|
1051
|
+
self.is_from_condition = False
|
|
1052
|
+
|
|
1053
|
+
if isinstance(condition, Scalar):
|
|
1054
|
+
thenValue = self.visit(node.thenOp)
|
|
1055
|
+
elseValue = self.visit(node.elseOp)
|
|
1056
|
+
if not isinstance(thenValue, Scalar) or not isinstance(elseValue, Scalar):
|
|
1057
|
+
raise SemanticError(
|
|
1058
|
+
"1-1-9-3",
|
|
1059
|
+
op="If_op",
|
|
1060
|
+
then_name=thenValue.name,
|
|
1061
|
+
else_name=elseValue.name,
|
|
1062
|
+
)
|
|
1063
|
+
if condition.value:
|
|
1064
|
+
return self.visit(node.thenOp)
|
|
1065
|
+
else:
|
|
1066
|
+
return self.visit(node.elseOp)
|
|
1067
|
+
|
|
1068
|
+
# Analysis for data component and dataset
|
|
1069
|
+
else:
|
|
1070
|
+
if self.condition_stack is None:
|
|
1071
|
+
self.condition_stack = []
|
|
1072
|
+
if self.then_condition_dataset is None:
|
|
1073
|
+
self.then_condition_dataset = []
|
|
1074
|
+
if self.else_condition_dataset is None:
|
|
1075
|
+
self.else_condition_dataset = []
|
|
1076
|
+
self.generate_then_else_datasets(copy(condition))
|
|
1077
|
+
|
|
1078
|
+
self.condition_stack.append(THEN_ELSE["then"])
|
|
1079
|
+
self.is_from_if = True
|
|
1080
|
+
self.nested_condition = "T" if isinstance(node.thenOp, AST.If) else False
|
|
1081
|
+
thenOp = self.visit(node.thenOp)
|
|
1082
|
+
if isinstance(thenOp, Scalar) or not isinstance(node.thenOp, AST.BinOp):
|
|
1083
|
+
self.then_condition_dataset.pop()
|
|
1084
|
+
self.condition_stack.pop()
|
|
1085
|
+
|
|
1086
|
+
self.condition_stack.append(THEN_ELSE["else"])
|
|
1087
|
+
self.is_from_if = True
|
|
1088
|
+
self.nested_condition = "E" if isinstance(node.elseOp, AST.If) else False
|
|
1089
|
+
elseOp = self.visit(node.elseOp)
|
|
1090
|
+
if isinstance(elseOp, Scalar) or (
|
|
1091
|
+
not isinstance(node.elseOp, AST.BinOp) and not isinstance(node.elseOp, AST.If)
|
|
1092
|
+
):
|
|
1093
|
+
if len(self.else_condition_dataset) > 0:
|
|
1094
|
+
self.else_condition_dataset.pop()
|
|
1095
|
+
if len(self.condition_stack) > 0:
|
|
1096
|
+
self.condition_stack.pop()
|
|
1097
|
+
|
|
1098
|
+
return If.analyze(condition, thenOp, elseOp)
|
|
1099
|
+
|
|
1100
|
+
def visit_Case(self, node: AST.Case) -> Any:
|
|
1101
|
+
conditions: List[Any] = []
|
|
1102
|
+
thenOps: List[Any] = []
|
|
1103
|
+
|
|
1104
|
+
if self.condition_stack is None:
|
|
1105
|
+
self.condition_stack = []
|
|
1106
|
+
if self.then_condition_dataset is None:
|
|
1107
|
+
self.then_condition_dataset = []
|
|
1108
|
+
if self.else_condition_dataset is None:
|
|
1109
|
+
self.else_condition_dataset = []
|
|
1110
|
+
|
|
1111
|
+
for case in node.cases:
|
|
1112
|
+
self.is_from_condition = True
|
|
1113
|
+
cond = self.visit(case.condition)
|
|
1114
|
+
self.is_from_condition = False
|
|
1115
|
+
|
|
1116
|
+
conditions.append(cond)
|
|
1117
|
+
if isinstance(cond, Scalar):
|
|
1118
|
+
then_result = self.visit(case.thenOp)
|
|
1119
|
+
thenOps.append(then_result)
|
|
1120
|
+
continue
|
|
1121
|
+
|
|
1122
|
+
self.generate_then_else_datasets(copy(cond))
|
|
1123
|
+
|
|
1124
|
+
self.condition_stack.append(THEN_ELSE["then"])
|
|
1125
|
+
self.is_from_if = True
|
|
1126
|
+
self.is_from_case_then = True
|
|
1127
|
+
|
|
1128
|
+
then_result = self.visit(case.thenOp)
|
|
1129
|
+
thenOps.append(then_result)
|
|
1130
|
+
|
|
1131
|
+
self.is_from_case_then = False
|
|
1132
|
+
self.is_from_if = False
|
|
1133
|
+
if len(self.condition_stack) > 0:
|
|
1134
|
+
self.condition_stack.pop()
|
|
1135
|
+
if len(self.then_condition_dataset) > 0:
|
|
1136
|
+
self.then_condition_dataset.pop()
|
|
1137
|
+
if len(self.else_condition_dataset) > 0:
|
|
1138
|
+
self.else_condition_dataset.pop()
|
|
1139
|
+
|
|
1140
|
+
elseOp = self.visit(node.elseOp)
|
|
1141
|
+
|
|
1142
|
+
return Case.analyze(conditions, thenOps, elseOp)
|
|
1143
|
+
|
|
1144
|
+
def visit_RenameNode(self, node: AST.RenameNode) -> Any:
|
|
1145
|
+
if self.udo_params is not None:
|
|
1146
|
+
if "#" in node.old_name:
|
|
1147
|
+
if node.old_name.split("#")[1] in self.udo_params[-1]:
|
|
1148
|
+
comp_name = self.udo_params[-1][node.old_name.split("#")[1]]
|
|
1149
|
+
node.old_name = f"{node.old_name.split('#')[0]}#{comp_name}"
|
|
1150
|
+
else:
|
|
1151
|
+
if node.old_name in self.udo_params[-1]:
|
|
1152
|
+
node.old_name = self.udo_params[-1][node.old_name]
|
|
1153
|
+
|
|
1154
|
+
if (
|
|
1155
|
+
self.is_from_join
|
|
1156
|
+
and self.regular_aggregation_dataset is not None
|
|
1157
|
+
and node.old_name not in self.regular_aggregation_dataset.components
|
|
1158
|
+
):
|
|
1159
|
+
node.old_name = node.old_name.split("#")[1]
|
|
1160
|
+
|
|
1161
|
+
return node
|
|
1162
|
+
|
|
1163
|
+
def visit_Constant(self, node: AST.Constant) -> Any:
|
|
1164
|
+
return Scalar(
|
|
1165
|
+
name=str(node.value),
|
|
1166
|
+
value=node.value,
|
|
1167
|
+
data_type=BASIC_TYPES[type(node.value)],
|
|
1168
|
+
)
|
|
1169
|
+
|
|
1170
|
+
def visit_JoinOp(self, node: AST.JoinOp) -> None:
|
|
1171
|
+
clause_elements = []
|
|
1172
|
+
for clause in node.clauses:
|
|
1173
|
+
clause_elements.append(self.visit(clause))
|
|
1174
|
+
if hasattr(clause, "op") and clause.op == AS:
|
|
1175
|
+
# TODO: We need to delete somewhere the join datasets with alias that are added here
|
|
1176
|
+
self.datasets[clause_elements[-1].name] = clause_elements[-1]
|
|
1177
|
+
|
|
1178
|
+
# No need to check using, regular aggregation is executed afterwards
|
|
1179
|
+
self.is_from_join = True
|
|
1180
|
+
return JOIN_MAPPING[node.op].analyze(clause_elements, node.using)
|
|
1181
|
+
|
|
1182
|
+
def visit_ParamConstant(self, node: AST.ParamConstant) -> str:
|
|
1183
|
+
return node.value
|
|
1184
|
+
|
|
1185
|
+
def visit_ParamOp(self, node: AST.ParamOp) -> None: # noqa: C901
|
|
1186
|
+
if node.op == ROUND:
|
|
1187
|
+
op_element = self.visit(node.children[0])
|
|
1188
|
+
param_element = self.visit(node.params[0]) if len(node.params) != 0 else None
|
|
1189
|
+
return Round.analyze(op_element, param_element)
|
|
1190
|
+
|
|
1191
|
+
# Numeric Operator
|
|
1192
|
+
elif node.op == TRUNC:
|
|
1193
|
+
op_element = self.visit(node.children[0])
|
|
1194
|
+
param_element = None
|
|
1195
|
+
if len(node.params) != 0:
|
|
1196
|
+
param_element = self.visit(node.params[0])
|
|
1197
|
+
|
|
1198
|
+
return Trunc.analyze(op_element, param_element)
|
|
1199
|
+
|
|
1200
|
+
elif node.op == SUBSTR or node.op == REPLACE or node.op == INSTR:
|
|
1201
|
+
params = [None, None, None]
|
|
1202
|
+
op_element = self.visit(node.children[0])
|
|
1203
|
+
for i, node_param in enumerate(node.params):
|
|
1204
|
+
params[i] = self.visit(node_param)
|
|
1205
|
+
param1, param2, param3 = tuple(params)
|
|
1206
|
+
if node.op == SUBSTR:
|
|
1207
|
+
return Substr.analyze(op_element, param1, param2)
|
|
1208
|
+
elif node.op == REPLACE:
|
|
1209
|
+
return Replace.analyze(op_element, param1, param2)
|
|
1210
|
+
elif node.op == INSTR:
|
|
1211
|
+
return Instr.analyze(op_element, param1, param2, param3)
|
|
1212
|
+
else:
|
|
1213
|
+
raise NotImplementedError
|
|
1214
|
+
elif node.op == HAVING:
|
|
1215
|
+
if self.aggregation_dataset is not None and self.aggregation_grouping is not None:
|
|
1216
|
+
for id_name in self.aggregation_grouping:
|
|
1217
|
+
if id_name not in self.aggregation_dataset.components:
|
|
1218
|
+
raise SemanticError("1-1-2-4", op=node.op, id_name=id_name)
|
|
1219
|
+
if len(self.aggregation_dataset.get_measures()) != 1:
|
|
1220
|
+
raise ValueError("Only one measure is allowed")
|
|
1221
|
+
# Deepcopy is necessary for components to avoid changing the original dataset
|
|
1222
|
+
self.aggregation_dataset.components = {
|
|
1223
|
+
comp_name: deepcopy(comp)
|
|
1224
|
+
for comp_name, comp in self.aggregation_dataset.components.items()
|
|
1225
|
+
if comp_name in self.aggregation_grouping or comp.role == Role.MEASURE
|
|
1226
|
+
}
|
|
1227
|
+
|
|
1228
|
+
self.aggregation_dataset.data = (
|
|
1229
|
+
self.aggregation_dataset.data[
|
|
1230
|
+
self.aggregation_dataset.get_identifiers_names()
|
|
1231
|
+
+ self.aggregation_dataset.get_measures_names()
|
|
1232
|
+
]
|
|
1233
|
+
if (self.aggregation_dataset.data is not None)
|
|
1234
|
+
else None
|
|
1235
|
+
)
|
|
1236
|
+
result = self.visit(node.params)
|
|
1237
|
+
measure = result.get_measures()[0]
|
|
1238
|
+
if measure.data_type != Boolean:
|
|
1239
|
+
raise SemanticError("1-1-2-3", type=SCALAR_TYPES_CLASS_REVERSE[Boolean])
|
|
1240
|
+
return None
|
|
1241
|
+
elif node.op == FILL_TIME_SERIES:
|
|
1242
|
+
mode = self.visit(node.params[0]) if len(node.params) == 1 else "all"
|
|
1243
|
+
return Fill_time_series.analyze(self.visit(node.children[0]), mode)
|
|
1244
|
+
elif node.op == DATE_ADD:
|
|
1245
|
+
params = [self.visit(node.params[0]), self.visit(node.params[1])]
|
|
1246
|
+
return Date_Add.analyze(self.visit(node.children[0]), params)
|
|
1247
|
+
elif node.op == CAST:
|
|
1248
|
+
operand = self.visit(node.children[0])
|
|
1249
|
+
scalar_type = node.children[1]
|
|
1250
|
+
mask = None
|
|
1251
|
+
if len(node.params) > 0:
|
|
1252
|
+
mask = self.visit(node.params[0])
|
|
1253
|
+
return Cast.analyze(operand, scalar_type, mask)
|
|
1254
|
+
|
|
1255
|
+
elif node.op == CHECK_DATAPOINT:
|
|
1256
|
+
if self.dprs is None:
|
|
1257
|
+
raise SemanticError("1-2-6", node_type="Datapoint Rulesets", node_value="")
|
|
1258
|
+
# Checking if ruleset exists
|
|
1259
|
+
dpr_name: Any = node.children[1]
|
|
1260
|
+
if dpr_name not in self.dprs:
|
|
1261
|
+
raise SemanticError("1-2-6", node_type="Datapoint Ruleset", node_value=dpr_name)
|
|
1262
|
+
dpr_info = self.dprs[dpr_name]
|
|
1263
|
+
|
|
1264
|
+
# Extracting dataset
|
|
1265
|
+
dataset_element = self.visit(node.children[0])
|
|
1266
|
+
if not isinstance(dataset_element, Dataset):
|
|
1267
|
+
raise SemanticError("1-1-1-20", op=node.op)
|
|
1268
|
+
# Checking if list of components supplied is valid
|
|
1269
|
+
if len(node.children) > 2:
|
|
1270
|
+
for comp_name in node.children[2:]:
|
|
1271
|
+
if comp_name.__str__() not in dataset_element.components:
|
|
1272
|
+
raise SemanticError(
|
|
1273
|
+
"1-1-1-10",
|
|
1274
|
+
comp_name=comp_name,
|
|
1275
|
+
dataset_name=dataset_element.name,
|
|
1276
|
+
)
|
|
1277
|
+
if dpr_info is not None and dpr_info["signature_type"] == "variable":
|
|
1278
|
+
for i, comp_name in enumerate(node.children[2:]):
|
|
1279
|
+
if comp_name != dpr_info["params"][i]:
|
|
1280
|
+
raise SemanticError(
|
|
1281
|
+
"1-1-10-3",
|
|
1282
|
+
op=node.op,
|
|
1283
|
+
expected=dpr_info["params"][i],
|
|
1284
|
+
found=comp_name,
|
|
1285
|
+
)
|
|
1286
|
+
|
|
1287
|
+
output: Any = node.params[0] # invalid, all_measures, all
|
|
1288
|
+
if dpr_info is None:
|
|
1289
|
+
dpr_info = {}
|
|
1290
|
+
|
|
1291
|
+
rule_output_values = {}
|
|
1292
|
+
self.ruleset_dataset = dataset_element
|
|
1293
|
+
self.ruleset_signature = dpr_info["signature"]
|
|
1294
|
+
self.ruleset_mode = output
|
|
1295
|
+
# Gather rule data, adding the ruleset dataset to the interpreter
|
|
1296
|
+
if dpr_info is not None:
|
|
1297
|
+
for rule in dpr_info["rules"]:
|
|
1298
|
+
rule_output_values[rule.name] = {
|
|
1299
|
+
"errorcode": rule.erCode,
|
|
1300
|
+
"errorlevel": rule.erLevel,
|
|
1301
|
+
"output": self.visit(rule),
|
|
1302
|
+
}
|
|
1303
|
+
self.ruleset_mode = None
|
|
1304
|
+
self.ruleset_signature = None
|
|
1305
|
+
self.ruleset_dataset = None
|
|
1306
|
+
|
|
1307
|
+
# Datapoint Ruleset final evaluation
|
|
1308
|
+
return Check_Datapoint.analyze(
|
|
1309
|
+
dataset_element=dataset_element,
|
|
1310
|
+
rule_info=rule_output_values,
|
|
1311
|
+
output=output,
|
|
1312
|
+
)
|
|
1313
|
+
elif node.op in (CHECK_HIERARCHY, HIERARCHY):
|
|
1314
|
+
component: Optional[str] = None
|
|
1315
|
+
if len(node.children) == 2:
|
|
1316
|
+
dataset, hr_name = (self.visit(x) for x in node.children)
|
|
1317
|
+
cond_components: List[str] = []
|
|
1318
|
+
elif len(node.children) == 3:
|
|
1319
|
+
dataset, component, hr_name = (self.visit(x) for x in node.children)
|
|
1320
|
+
cond_components = []
|
|
1321
|
+
else:
|
|
1322
|
+
children = [self.visit(x) for x in node.children]
|
|
1323
|
+
dataset = children[0]
|
|
1324
|
+
component = children[1]
|
|
1325
|
+
hr_name = children[2]
|
|
1326
|
+
cond_components = children[3:]
|
|
1327
|
+
|
|
1328
|
+
# Input is always dataset
|
|
1329
|
+
mode, input_, output = (self.visit(param) for param in node.params)
|
|
1330
|
+
|
|
1331
|
+
# Sanitise the hierarchical ruleset and the call
|
|
1332
|
+
|
|
1333
|
+
if self.hrs is None:
|
|
1334
|
+
raise SemanticError("1-2-6", node_type="Hierarchical Rulesets", node_value="")
|
|
1335
|
+
else:
|
|
1336
|
+
if hr_name not in self.hrs:
|
|
1337
|
+
raise SemanticError(
|
|
1338
|
+
"1-2-6", node_type="Hierarchical Ruleset", node_value=hr_name
|
|
1339
|
+
)
|
|
1340
|
+
|
|
1341
|
+
if not isinstance(dataset, Dataset):
|
|
1342
|
+
raise SemanticError("1-1-1-20", op=node.op)
|
|
1343
|
+
|
|
1344
|
+
hr_info = self.hrs[hr_name]
|
|
1345
|
+
if hr_info is not None:
|
|
1346
|
+
if len(cond_components) != len(hr_info["condition"]):
|
|
1347
|
+
raise SemanticError("1-1-10-2", op=node.op)
|
|
1348
|
+
|
|
1349
|
+
if (
|
|
1350
|
+
hr_info["node"].signature_type == "variable"
|
|
1351
|
+
and hr_info["signature"] != component
|
|
1352
|
+
):
|
|
1353
|
+
raise SemanticError(
|
|
1354
|
+
"1-1-10-3",
|
|
1355
|
+
op=node.op,
|
|
1356
|
+
found=component,
|
|
1357
|
+
expected=hr_info["signature"],
|
|
1358
|
+
)
|
|
1359
|
+
elif hr_info["node"].signature_type == "valuedomain" and component is None:
|
|
1360
|
+
raise SemanticError("1-1-10-4", op=node.op)
|
|
1361
|
+
elif component is None:
|
|
1362
|
+
# TODO: Leaving this until refactor in Ruleset handling is done
|
|
1363
|
+
raise NotImplementedError(
|
|
1364
|
+
"Hierarchical Ruleset handling without component "
|
|
1365
|
+
"and signature type variable is not implemented yet."
|
|
1366
|
+
)
|
|
1367
|
+
|
|
1368
|
+
cond_info = {}
|
|
1369
|
+
for i, cond_comp in enumerate(hr_info["condition"]):
|
|
1370
|
+
if (
|
|
1371
|
+
hr_info["node"].signature_type == "variable"
|
|
1372
|
+
and cond_components[i] != cond_comp
|
|
1373
|
+
):
|
|
1374
|
+
raise SemanticError(
|
|
1375
|
+
"1-1-10-6",
|
|
1376
|
+
op=node.op,
|
|
1377
|
+
expected=cond_comp,
|
|
1378
|
+
found=cond_components[i],
|
|
1379
|
+
)
|
|
1380
|
+
cond_info[cond_comp] = cond_components[i]
|
|
1381
|
+
|
|
1382
|
+
if node.op == HIERARCHY:
|
|
1383
|
+
aux = []
|
|
1384
|
+
for rule in hr_info["rules"]:
|
|
1385
|
+
if rule.rule.op == EQ or rule.rule.op == WHEN and rule.rule.right.op == EQ:
|
|
1386
|
+
aux.append(rule)
|
|
1387
|
+
# Filter only the rules with HRBinOP as =,
|
|
1388
|
+
# as they are the ones that will be computed
|
|
1389
|
+
if len(aux) == 0:
|
|
1390
|
+
raise SemanticError("1-1-10-5")
|
|
1391
|
+
hr_info["rules"] = aux
|
|
1392
|
+
|
|
1393
|
+
hierarchy_ast = AST.HRuleset(
|
|
1394
|
+
name=hr_name,
|
|
1395
|
+
signature_type=hr_info["node"].signature_type,
|
|
1396
|
+
element=hr_info["node"].element,
|
|
1397
|
+
rules=aux,
|
|
1398
|
+
line_start=node.line_start,
|
|
1399
|
+
line_stop=node.line_stop,
|
|
1400
|
+
column_start=node.column_start,
|
|
1401
|
+
column_stop=node.column_stop,
|
|
1402
|
+
)
|
|
1403
|
+
HRDAGAnalyzer().visit(hierarchy_ast)
|
|
1404
|
+
|
|
1405
|
+
Check_Hierarchy.validate_hr_dataset(dataset, component)
|
|
1406
|
+
|
|
1407
|
+
# Gather rule data, adding the necessary elements to the interpreter
|
|
1408
|
+
# for simplicity
|
|
1409
|
+
self.ruleset_dataset = dataset
|
|
1410
|
+
self.ruleset_signature = {**{"RULE_COMPONENT": component}, **cond_info}
|
|
1411
|
+
self.ruleset_mode = mode
|
|
1412
|
+
self.hr_input = input_
|
|
1413
|
+
rule_output_values = {}
|
|
1414
|
+
if node.op == HIERARCHY:
|
|
1415
|
+
self.is_from_hr_agg = True
|
|
1416
|
+
self.hr_agg_rules_computed = {}
|
|
1417
|
+
for rule in hr_info["rules"]:
|
|
1418
|
+
self.visit(rule)
|
|
1419
|
+
self.is_from_hr_agg = False
|
|
1420
|
+
else:
|
|
1421
|
+
self.is_from_hr_val = True
|
|
1422
|
+
for rule in hr_info["rules"]:
|
|
1423
|
+
rule_output_values[rule.name] = {
|
|
1424
|
+
"errorcode": rule.erCode,
|
|
1425
|
+
"errorlevel": rule.erLevel,
|
|
1426
|
+
"output": self.visit(rule),
|
|
1427
|
+
}
|
|
1428
|
+
self.is_from_hr_val = False
|
|
1429
|
+
self.ruleset_signature = None
|
|
1430
|
+
self.ruleset_dataset = None
|
|
1431
|
+
self.ruleset_mode = None
|
|
1432
|
+
self.hr_input = None
|
|
1433
|
+
|
|
1434
|
+
# Final evaluation
|
|
1435
|
+
if node.op == CHECK_HIERARCHY:
|
|
1436
|
+
result = Check_Hierarchy.analyze(
|
|
1437
|
+
dataset_element=dataset,
|
|
1438
|
+
rule_info=rule_output_values,
|
|
1439
|
+
output=output,
|
|
1440
|
+
)
|
|
1441
|
+
del rule_output_values
|
|
1442
|
+
else:
|
|
1443
|
+
result = Hierarchy.analyze(dataset, self.hr_agg_rules_computed, output)
|
|
1444
|
+
self.hr_agg_rules_computed = None
|
|
1445
|
+
return result
|
|
1446
|
+
|
|
1447
|
+
raise SemanticError("1-3-5", op_type="ParamOp", node_op=node.op)
|
|
1448
|
+
|
|
1449
|
+
def visit_DPRule(self, node: AST.DPRule) -> None:
|
|
1450
|
+
self.is_from_rule = True
|
|
1451
|
+
if self.ruleset_dataset is not None:
|
|
1452
|
+
if self.ruleset_dataset.data is None:
|
|
1453
|
+
self.rule_data = None
|
|
1454
|
+
else:
|
|
1455
|
+
self.rule_data = self.ruleset_dataset.data.copy()
|
|
1456
|
+
validation_data = self.visit(node.rule)
|
|
1457
|
+
if isinstance(validation_data, DataComponent):
|
|
1458
|
+
if self.rule_data is not None and self.ruleset_dataset is not None:
|
|
1459
|
+
aux = self.rule_data.loc[:, self.ruleset_dataset.get_components_names()]
|
|
1460
|
+
aux["bool_var"] = validation_data.data
|
|
1461
|
+
validation_data = aux
|
|
1462
|
+
else:
|
|
1463
|
+
validation_data = None
|
|
1464
|
+
if self.ruleset_mode == "invalid" and validation_data is not None:
|
|
1465
|
+
validation_data = validation_data[validation_data["bool_var"] == False]
|
|
1466
|
+
self.rule_data = None
|
|
1467
|
+
self.is_from_rule = False
|
|
1468
|
+
return validation_data
|
|
1469
|
+
|
|
1470
|
+
def visit_HRule(self, node: AST.HRule) -> None:
|
|
1471
|
+
self.is_from_rule = True
|
|
1472
|
+
if self.ruleset_dataset is not None:
|
|
1473
|
+
self.rule_data = (
|
|
1474
|
+
None if self.ruleset_dataset.data is None else self.ruleset_dataset.data.copy()
|
|
1475
|
+
)
|
|
1476
|
+
rule_result = self.visit(node.rule)
|
|
1477
|
+
if rule_result is None:
|
|
1478
|
+
self.is_from_rule = False
|
|
1479
|
+
return None
|
|
1480
|
+
if self.is_from_hr_agg:
|
|
1481
|
+
measure_name = rule_result.get_measures_names()[0]
|
|
1482
|
+
if (
|
|
1483
|
+
self.hr_agg_rules_computed is not None
|
|
1484
|
+
and rule_result.data is not None
|
|
1485
|
+
and len(rule_result.data[measure_name]) > 0
|
|
1486
|
+
):
|
|
1487
|
+
self.hr_agg_rules_computed[rule_result.name] = rule_result.data
|
|
1488
|
+
else:
|
|
1489
|
+
rule_result = rule_result.data
|
|
1490
|
+
self.rule_data = None
|
|
1491
|
+
self.is_from_rule = False
|
|
1492
|
+
return rule_result
|
|
1493
|
+
|
|
1494
|
+
def visit_HRBinOp(self, node: AST.HRBinOp) -> Any:
|
|
1495
|
+
if node.op == WHEN:
|
|
1496
|
+
filter_comp = self.visit(node.left)
|
|
1497
|
+
if self.rule_data is None:
|
|
1498
|
+
return None
|
|
1499
|
+
filtering_indexes = list(filter_comp.data[filter_comp.data == True].index)
|
|
1500
|
+
nan_indexes = list(filter_comp.data[filter_comp.data.isnull()].index)
|
|
1501
|
+
# If no filtering indexes, then all datapoints are valid on DPR and HR
|
|
1502
|
+
if len(filtering_indexes) == 0 and not (self.is_from_hr_agg or self.is_from_hr_val):
|
|
1503
|
+
self.rule_data["bool_var"] = True
|
|
1504
|
+
self.rule_data.loc[nan_indexes, "bool_var"] = None
|
|
1505
|
+
return self.rule_data
|
|
1506
|
+
non_filtering_indexes = list(set(filter_comp.data.index) - set(filtering_indexes))
|
|
1507
|
+
|
|
1508
|
+
original_data = self.rule_data.copy()
|
|
1509
|
+
self.rule_data = self.rule_data.iloc[filtering_indexes].reset_index(drop=True)
|
|
1510
|
+
result_validation = self.visit(node.right)
|
|
1511
|
+
if self.is_from_hr_agg or self.is_from_hr_val:
|
|
1512
|
+
# We only need to filter rule_data on DPR
|
|
1513
|
+
return result_validation
|
|
1514
|
+
self.rule_data["bool_var"] = result_validation.data
|
|
1515
|
+
original_data = original_data.merge(
|
|
1516
|
+
self.rule_data, how="left", on=original_data.columns.tolist()
|
|
1517
|
+
)
|
|
1518
|
+
original_data.loc[non_filtering_indexes, "bool_var"] = True
|
|
1519
|
+
original_data.loc[nan_indexes, "bool_var"] = None
|
|
1520
|
+
return original_data
|
|
1521
|
+
elif node.op in HR_COMP_MAPPING:
|
|
1522
|
+
self.is_from_assignment = True
|
|
1523
|
+
if self.ruleset_mode in ("partial_null", "partial_zero"):
|
|
1524
|
+
self.hr_partial_is_valid = []
|
|
1525
|
+
left_operand = self.visit(node.left)
|
|
1526
|
+
self.is_from_assignment = False
|
|
1527
|
+
right_operand = self.visit(node.right)
|
|
1528
|
+
if isinstance(right_operand, Dataset):
|
|
1529
|
+
right_operand = get_measure_from_dataset(right_operand, node.right.value)
|
|
1530
|
+
|
|
1531
|
+
if self.ruleset_mode in ("partial_null", "partial_zero"):
|
|
1532
|
+
# Check all values were present in the dataset
|
|
1533
|
+
if self.hr_partial_is_valid and not any(self.hr_partial_is_valid):
|
|
1534
|
+
right_operand.data = right_operand.data.map(lambda x: "REMOVE_VALUE")
|
|
1535
|
+
self.hr_partial_is_valid = []
|
|
1536
|
+
|
|
1537
|
+
if self.is_from_hr_agg:
|
|
1538
|
+
return HAAssignment.analyze(left_operand, right_operand, self.ruleset_mode)
|
|
1539
|
+
else:
|
|
1540
|
+
result = HR_COMP_MAPPING[node.op].analyze(
|
|
1541
|
+
left_operand, right_operand, self.ruleset_mode
|
|
1542
|
+
)
|
|
1543
|
+
left_measure = left_operand.get_measures()[0]
|
|
1544
|
+
if left_operand.data is None:
|
|
1545
|
+
result.data = None
|
|
1546
|
+
else:
|
|
1547
|
+
left_original_measure_data = left_operand.data[left_measure.name]
|
|
1548
|
+
result.data[left_measure.name] = left_original_measure_data
|
|
1549
|
+
result.components[left_measure.name] = left_measure
|
|
1550
|
+
return result
|
|
1551
|
+
else:
|
|
1552
|
+
left_operand = self.visit(node.left)
|
|
1553
|
+
right_operand = self.visit(node.right)
|
|
1554
|
+
if (
|
|
1555
|
+
isinstance(left_operand, Dataset)
|
|
1556
|
+
and isinstance(right_operand, Dataset)
|
|
1557
|
+
and self.ruleset_mode in ("partial_null", "partial_zero")
|
|
1558
|
+
and not self.only_semantic
|
|
1559
|
+
):
|
|
1560
|
+
measure_name = left_operand.get_measures_names()[0]
|
|
1561
|
+
if left_operand.data is None:
|
|
1562
|
+
left_operand.data = pd.DataFrame({measure_name: []})
|
|
1563
|
+
if right_operand.data is None:
|
|
1564
|
+
right_operand.data = pd.DataFrame({measure_name: []})
|
|
1565
|
+
left_null_indexes = set(
|
|
1566
|
+
left_operand.data[left_operand.data[measure_name].isnull()].index
|
|
1567
|
+
)
|
|
1568
|
+
right_null_indexes = set(
|
|
1569
|
+
right_operand.data[right_operand.data[measure_name].isnull()].index
|
|
1570
|
+
)
|
|
1571
|
+
# If no indexes are in common, then one datapoint is not null
|
|
1572
|
+
invalid_indexes = list(left_null_indexes.intersection(right_null_indexes))
|
|
1573
|
+
if len(invalid_indexes) > 0:
|
|
1574
|
+
left_operand.data.loc[invalid_indexes, measure_name] = "REMOVE_VALUE"
|
|
1575
|
+
if isinstance(left_operand, Dataset):
|
|
1576
|
+
left_operand = get_measure_from_dataset(left_operand, node.left.value)
|
|
1577
|
+
if isinstance(right_operand, Dataset):
|
|
1578
|
+
right_operand = get_measure_from_dataset(right_operand, node.right.value)
|
|
1579
|
+
return HR_NUM_BINARY_MAPPING[node.op].analyze(left_operand, right_operand)
|
|
1580
|
+
|
|
1581
|
+
def visit_HRUnOp(self, node: AST.HRUnOp) -> None:
|
|
1582
|
+
operand = self.visit(node.operand)
|
|
1583
|
+
return HR_UNARY_MAPPING[node.op].analyze(operand)
|
|
1584
|
+
|
|
1585
|
+
def visit_Validation(self, node: AST.Validation) -> Dataset:
|
|
1586
|
+
validation_element = self.visit(node.validation)
|
|
1587
|
+
if not isinstance(validation_element, Dataset):
|
|
1588
|
+
raise ValueError(f"Expected dataset, got {type(validation_element).__name__}")
|
|
1589
|
+
|
|
1590
|
+
imbalance_element = None
|
|
1591
|
+
if node.imbalance is not None:
|
|
1592
|
+
imbalance_element = self.visit(node.imbalance)
|
|
1593
|
+
if not isinstance(imbalance_element, Dataset):
|
|
1594
|
+
raise ValueError(f"Expected dataset, got {type(validation_element).__name__}")
|
|
1595
|
+
|
|
1596
|
+
return Check.analyze(
|
|
1597
|
+
validation_element=validation_element,
|
|
1598
|
+
imbalance_element=imbalance_element,
|
|
1599
|
+
error_code=node.error_code,
|
|
1600
|
+
error_level=node.error_level,
|
|
1601
|
+
invalid=node.invalid,
|
|
1602
|
+
)
|
|
1603
|
+
|
|
1604
|
+
def visit_EvalOp(self, node: AST.EvalOp) -> Dataset:
|
|
1605
|
+
"""
|
|
1606
|
+
EvalOp: (name, children, output, language)
|
|
1607
|
+
|
|
1608
|
+
Basic usage:
|
|
1609
|
+
|
|
1610
|
+
for child in node.children:
|
|
1611
|
+
self.visit(child)
|
|
1612
|
+
if node.output != None:
|
|
1613
|
+
self.visit(node.output)
|
|
1614
|
+
|
|
1615
|
+
"""
|
|
1616
|
+
if node.language not in EXTERNAL:
|
|
1617
|
+
raise Exception(f"Language {node.language} not supported on Eval")
|
|
1618
|
+
|
|
1619
|
+
if self.external_routines is None:
|
|
1620
|
+
raise SemanticError("2-3-10", comp_type="External Routines")
|
|
1621
|
+
|
|
1622
|
+
if node.name not in self.external_routines:
|
|
1623
|
+
raise SemanticError("1-3-5", op_type="External Routine", node_op=node.name)
|
|
1624
|
+
external_routine = self.external_routines[node.name]
|
|
1625
|
+
operands = {}
|
|
1626
|
+
for operand in node.operands:
|
|
1627
|
+
element = self.visit(operand)
|
|
1628
|
+
if not isinstance(element, Dataset):
|
|
1629
|
+
raise ValueError(f"Expected dataset, got {type(element).__name__} as Eval Operand")
|
|
1630
|
+
operands[element.name.split(".")[1] if "." in element.name else element.name] = element
|
|
1631
|
+
output_to_check = node.output
|
|
1632
|
+
return Eval.analyze(operands, external_routine, output_to_check)
|
|
1633
|
+
|
|
1634
|
+
def generate_then_else_datasets(self, condition: Union[Dataset, DataComponent]) -> None:
|
|
1635
|
+
components = {}
|
|
1636
|
+
if self.then_condition_dataset is None:
|
|
1637
|
+
self.then_condition_dataset = []
|
|
1638
|
+
if self.else_condition_dataset is None:
|
|
1639
|
+
self.else_condition_dataset = []
|
|
1640
|
+
if isinstance(condition, Dataset):
|
|
1641
|
+
if len(condition.get_measures()) != 1:
|
|
1642
|
+
raise SemanticError("1-1-1-4", op="condition")
|
|
1643
|
+
if condition.get_measures()[0].data_type != BASIC_TYPES[bool]:
|
|
1644
|
+
raise SemanticError("2-1-9-5", op="condition", name=condition.name)
|
|
1645
|
+
name = condition.get_measures_names()[0]
|
|
1646
|
+
if condition.data is None or condition.data.empty:
|
|
1647
|
+
data = None
|
|
1648
|
+
else:
|
|
1649
|
+
data = condition.data[name]
|
|
1650
|
+
components = {comp.name: comp for comp in condition.get_identifiers()}
|
|
1651
|
+
|
|
1652
|
+
else:
|
|
1653
|
+
if condition.data_type != BASIC_TYPES[bool]:
|
|
1654
|
+
raise SemanticError("2-1-9-4", op="condition", name=condition.name)
|
|
1655
|
+
name = condition.name
|
|
1656
|
+
data = None if condition.data is None else condition.data
|
|
1657
|
+
|
|
1658
|
+
if data is not None:
|
|
1659
|
+
if self.nested_condition and self.condition_stack is not None:
|
|
1660
|
+
merge_df = (
|
|
1661
|
+
self.then_condition_dataset[-1]
|
|
1662
|
+
if self.condition_stack[-1] == THEN_ELSE["then"]
|
|
1663
|
+
else self.else_condition_dataset[-1]
|
|
1664
|
+
)
|
|
1665
|
+
indexes = merge_df.data[merge_df.data.columns[-1]]
|
|
1666
|
+
else:
|
|
1667
|
+
indexes = data[data.notnull()].index
|
|
1668
|
+
|
|
1669
|
+
if isinstance(condition, Dataset):
|
|
1670
|
+
filtered_data = data.iloc[indexes]
|
|
1671
|
+
then_data: Any = (
|
|
1672
|
+
condition.data[condition.data[name] == True]
|
|
1673
|
+
if (condition.data is not None)
|
|
1674
|
+
else []
|
|
1675
|
+
)
|
|
1676
|
+
then_indexes: Any = list(filtered_data[filtered_data == True].index)
|
|
1677
|
+
if len(then_data) > len(then_indexes):
|
|
1678
|
+
then_data = then_data.iloc[then_indexes]
|
|
1679
|
+
then_data[name] = then_indexes
|
|
1680
|
+
else_data: Any = (
|
|
1681
|
+
condition.data[condition.data[name] != True]
|
|
1682
|
+
if (condition.data is not None)
|
|
1683
|
+
else []
|
|
1684
|
+
)
|
|
1685
|
+
else_indexes: Any = list(set(indexes) - set(then_indexes))
|
|
1686
|
+
if len(else_data) > len(else_indexes):
|
|
1687
|
+
else_data = else_data.iloc[else_indexes]
|
|
1688
|
+
else_data[name] = else_indexes
|
|
1689
|
+
else:
|
|
1690
|
+
filtered_data = data.iloc[indexes]
|
|
1691
|
+
then_indexes = list(filtered_data[filtered_data == True].index)
|
|
1692
|
+
else_indexes = list(set(indexes) - set(then_indexes))
|
|
1693
|
+
then_data = pd.DataFrame({name: then_indexes})
|
|
1694
|
+
else_data = pd.DataFrame({name: else_indexes})
|
|
1695
|
+
else:
|
|
1696
|
+
then_data = pd.DataFrame({name: []})
|
|
1697
|
+
else_data = pd.DataFrame({name: []})
|
|
1698
|
+
components.update(
|
|
1699
|
+
{
|
|
1700
|
+
name: Component(
|
|
1701
|
+
name=name,
|
|
1702
|
+
data_type=BASIC_TYPES[int],
|
|
1703
|
+
role=Role.MEASURE,
|
|
1704
|
+
nullable=True,
|
|
1705
|
+
)
|
|
1706
|
+
}
|
|
1707
|
+
)
|
|
1708
|
+
|
|
1709
|
+
if self.condition_stack and len(self.condition_stack) > 0:
|
|
1710
|
+
last_condition_dataset = (
|
|
1711
|
+
self.then_condition_dataset[-1]
|
|
1712
|
+
if self.condition_stack[-1] == THEN_ELSE["then"]
|
|
1713
|
+
else (self.else_condition_dataset[-1])
|
|
1714
|
+
)
|
|
1715
|
+
measure_name = last_condition_dataset.get_measures_names()[0]
|
|
1716
|
+
then_data = then_data[then_data[name].isin(last_condition_dataset.data[measure_name])]
|
|
1717
|
+
else_data = else_data[else_data[name].isin(last_condition_dataset.data[measure_name])]
|
|
1718
|
+
then_dataset = Dataset(name=name, components=components, data=then_data)
|
|
1719
|
+
else_dataset = Dataset(name=name, components=components, data=else_data)
|
|
1720
|
+
|
|
1721
|
+
self.then_condition_dataset.append(then_dataset)
|
|
1722
|
+
self.else_condition_dataset.append(else_dataset)
|
|
1723
|
+
|
|
1724
|
+
def merge_then_else_datasets(self, left_operand: Any, right_operand: Any) -> Any:
|
|
1725
|
+
if (
|
|
1726
|
+
self.then_condition_dataset is None
|
|
1727
|
+
or self.else_condition_dataset is None
|
|
1728
|
+
or self.condition_stack is None
|
|
1729
|
+
):
|
|
1730
|
+
return left_operand, right_operand
|
|
1731
|
+
|
|
1732
|
+
if self.is_from_case_then:
|
|
1733
|
+
merge_dataset = (
|
|
1734
|
+
self.then_condition_dataset[-1]
|
|
1735
|
+
if self.condition_stack[-1] == THEN_ELSE["then"]
|
|
1736
|
+
else self.else_condition_dataset[-1]
|
|
1737
|
+
)
|
|
1738
|
+
else:
|
|
1739
|
+
merge_dataset = (
|
|
1740
|
+
self.then_condition_dataset.pop()
|
|
1741
|
+
if self.condition_stack.pop() == THEN_ELSE["then"]
|
|
1742
|
+
else (self.else_condition_dataset.pop())
|
|
1743
|
+
)
|
|
1744
|
+
|
|
1745
|
+
merge_index = merge_dataset.data[merge_dataset.get_measures_names()[0]].to_list()
|
|
1746
|
+
ids = merge_dataset.get_identifiers_names()
|
|
1747
|
+
if isinstance(left_operand, (Dataset, DataComponent)):
|
|
1748
|
+
if left_operand.data is None:
|
|
1749
|
+
return left_operand, right_operand
|
|
1750
|
+
if isinstance(left_operand, Dataset):
|
|
1751
|
+
dataset_index = left_operand.data.index[
|
|
1752
|
+
left_operand.data[ids]
|
|
1753
|
+
.apply(tuple, 1)
|
|
1754
|
+
.isin(merge_dataset.data[ids].apply(tuple, 1))
|
|
1755
|
+
]
|
|
1756
|
+
left = left_operand.data[left_operand.get_measures_names()[0]]
|
|
1757
|
+
left_operand.data[left_operand.get_measures_names()[0]] = left.reindex(
|
|
1758
|
+
dataset_index, fill_value=None
|
|
1759
|
+
)
|
|
1760
|
+
else:
|
|
1761
|
+
left = left_operand.data
|
|
1762
|
+
left_operand.data = left.reindex(merge_index, fill_value=None)
|
|
1763
|
+
if isinstance(right_operand, (Dataset, DataComponent)):
|
|
1764
|
+
if right_operand.data is None:
|
|
1765
|
+
return left_operand, right_operand
|
|
1766
|
+
if isinstance(right_operand, Dataset):
|
|
1767
|
+
dataset_index = right_operand.data.index[
|
|
1768
|
+
right_operand.data[ids]
|
|
1769
|
+
.apply(tuple, 1)
|
|
1770
|
+
.isin(merge_dataset.data[ids].apply(tuple, 1))
|
|
1771
|
+
]
|
|
1772
|
+
right = right_operand.data[right_operand.get_measures_names()[0]]
|
|
1773
|
+
right_operand.data[right_operand.get_measures_names()[0]] = right.reindex(
|
|
1774
|
+
dataset_index, fill_value=None
|
|
1775
|
+
)
|
|
1776
|
+
else:
|
|
1777
|
+
right = right_operand.data
|
|
1778
|
+
right_operand.data = right.reindex(merge_index, fill_value=None)
|
|
1779
|
+
return left_operand, right_operand
|
|
1780
|
+
|
|
1781
|
+
def visit_Identifier(self, node: AST.Identifier) -> Union[AST.AST, Dataset, str]:
|
|
1782
|
+
"""
|
|
1783
|
+
Identifier: (value)
|
|
1784
|
+
|
|
1785
|
+
Basic usage:
|
|
1786
|
+
|
|
1787
|
+
return node.value
|
|
1788
|
+
"""
|
|
1789
|
+
|
|
1790
|
+
if self.udo_params is not None and node.value in self.udo_params[-1]:
|
|
1791
|
+
return self.udo_params[-1][node.value]
|
|
1792
|
+
|
|
1793
|
+
if node.value in self.datasets:
|
|
1794
|
+
if self.is_from_assignment:
|
|
1795
|
+
return copy(self.datasets[node.value].name)
|
|
1796
|
+
return copy(self.datasets[node.value])
|
|
1797
|
+
return node.value
|
|
1798
|
+
|
|
1799
|
+
def visit_DefIdentifier(self, node: AST.DefIdentifier) -> Any:
|
|
1800
|
+
"""
|
|
1801
|
+
DefIdentifier: (value, kind)
|
|
1802
|
+
|
|
1803
|
+
Basic usage:
|
|
1804
|
+
|
|
1805
|
+
return node.value
|
|
1806
|
+
"""
|
|
1807
|
+
partial_is_valid = True
|
|
1808
|
+
# Only for Hierarchical Rulesets
|
|
1809
|
+
if not (self.is_from_rule and node.kind == "CodeItemID"):
|
|
1810
|
+
return node.value
|
|
1811
|
+
|
|
1812
|
+
# Getting Dataset elements
|
|
1813
|
+
result_components = {
|
|
1814
|
+
comp_name: copy(comp)
|
|
1815
|
+
for comp_name, comp in self.ruleset_dataset.components.items() # type: ignore[union-attr]
|
|
1816
|
+
}
|
|
1817
|
+
if self.ruleset_signature is not None:
|
|
1818
|
+
hr_component = self.ruleset_signature["RULE_COMPONENT"]
|
|
1819
|
+
name = node.value
|
|
1820
|
+
|
|
1821
|
+
if self.rule_data is None:
|
|
1822
|
+
return Dataset(name=name, components=result_components, data=None)
|
|
1823
|
+
|
|
1824
|
+
condition = None
|
|
1825
|
+
if hasattr(node, "_right_condition"):
|
|
1826
|
+
condition: DataComponent = self.visit(node._right_condition) # type: ignore[no-redef]
|
|
1827
|
+
if condition is not None:
|
|
1828
|
+
condition = condition.data[condition.data == True].index
|
|
1829
|
+
|
|
1830
|
+
if (
|
|
1831
|
+
self.hr_agg_rules_computed is not None
|
|
1832
|
+
and self.hr_input == "rule"
|
|
1833
|
+
and node.value in self.hr_agg_rules_computed
|
|
1834
|
+
):
|
|
1835
|
+
df = self.hr_agg_rules_computed[node.value].copy()
|
|
1836
|
+
return Dataset(name=name, components=result_components, data=df)
|
|
1837
|
+
|
|
1838
|
+
df = self.rule_data.copy()
|
|
1839
|
+
if condition is not None:
|
|
1840
|
+
df = df.loc[condition].reset_index(drop=True)
|
|
1841
|
+
|
|
1842
|
+
measure_name = self.ruleset_dataset.get_measures_names()[0] # type: ignore[union-attr]
|
|
1843
|
+
if node.value in df[hr_component].values:
|
|
1844
|
+
rest_identifiers = [
|
|
1845
|
+
comp.name
|
|
1846
|
+
for comp in result_components.values()
|
|
1847
|
+
if comp.role == Role.IDENTIFIER and comp.name != hr_component
|
|
1848
|
+
]
|
|
1849
|
+
code_data = df[df[hr_component] == node.value].reset_index(drop=True)
|
|
1850
|
+
code_data = code_data.merge(df[rest_identifiers], how="right", on=rest_identifiers)
|
|
1851
|
+
code_data = code_data.drop_duplicates().reset_index(drop=True)
|
|
1852
|
+
|
|
1853
|
+
# If the value is in the dataset, we create a new row
|
|
1854
|
+
# based on the hierarchy mode
|
|
1855
|
+
# (Missing data points are considered,
|
|
1856
|
+
# lines 6483-6510 of the reference manual)
|
|
1857
|
+
if self.ruleset_mode in ("partial_null", "partial_zero"):
|
|
1858
|
+
# We do not care about the presence of the leftCodeItem in Hierarchy Roll-up
|
|
1859
|
+
if self.is_from_hr_agg and self.is_from_assignment:
|
|
1860
|
+
pass
|
|
1861
|
+
elif code_data[hr_component].isnull().any():
|
|
1862
|
+
partial_is_valid = False
|
|
1863
|
+
|
|
1864
|
+
if self.ruleset_mode in ("non_zero", "partial_zero", "always_zero"):
|
|
1865
|
+
fill_indexes = code_data[code_data[hr_component].isnull()].index
|
|
1866
|
+
code_data.loc[fill_indexes, measure_name] = 0
|
|
1867
|
+
code_data[hr_component] = node.value
|
|
1868
|
+
df = code_data
|
|
1869
|
+
else:
|
|
1870
|
+
# If the value is not in the dataset, we create a new row
|
|
1871
|
+
# based on the hierarchy mode
|
|
1872
|
+
# (Missing data points are considered,
|
|
1873
|
+
# lines 6483-6510 of the reference manual)
|
|
1874
|
+
if self.ruleset_mode in ("partial_null", "partial_zero"):
|
|
1875
|
+
# We do not care about the presence of the leftCodeItem in Hierarchy Roll-up
|
|
1876
|
+
if self.is_from_hr_agg and self.is_from_assignment:
|
|
1877
|
+
pass
|
|
1878
|
+
elif self.ruleset_mode == "partial_null":
|
|
1879
|
+
partial_is_valid = False
|
|
1880
|
+
df = df.head(1)
|
|
1881
|
+
df[hr_component] = node.value
|
|
1882
|
+
if self.ruleset_mode in ("non_zero", "partial_zero", "always_zero"):
|
|
1883
|
+
df[measure_name] = 0
|
|
1884
|
+
else: # For non_null, partial_null and always_null
|
|
1885
|
+
df[measure_name] = None
|
|
1886
|
+
if self.hr_partial_is_valid is not None and self.ruleset_mode in (
|
|
1887
|
+
"partial_null",
|
|
1888
|
+
"partial_zero",
|
|
1889
|
+
):
|
|
1890
|
+
self.hr_partial_is_valid.append(partial_is_valid)
|
|
1891
|
+
return Dataset(name=name, components=result_components, data=df)
|
|
1892
|
+
|
|
1893
|
+
def visit_UDOCall(self, node: AST.UDOCall) -> None: # noqa: C901
|
|
1894
|
+
if self.udos is None:
|
|
1895
|
+
raise SemanticError("2-3-10", comp_type="User Defined Operators")
|
|
1896
|
+
elif node.op not in self.udos:
|
|
1897
|
+
raise SemanticError("1-2-3", node_op=node.op, op_type="User Defined Operator")
|
|
1898
|
+
if self.signature_values is None:
|
|
1899
|
+
self.signature_values = {}
|
|
1900
|
+
|
|
1901
|
+
operator = self.udos[node.op]
|
|
1902
|
+
signature_values = {}
|
|
1903
|
+
|
|
1904
|
+
if operator is None:
|
|
1905
|
+
raise SemanticError("1-2-3", node_op=node.op, op_type="User Defined Operator")
|
|
1906
|
+
if operator["output"] == "Component" and not (
|
|
1907
|
+
self.is_from_regular_aggregation or self.is_from_rule
|
|
1908
|
+
):
|
|
1909
|
+
raise SemanticError("1-2-12", op=node.op)
|
|
1910
|
+
|
|
1911
|
+
for i, param in enumerate(operator["params"]):
|
|
1912
|
+
if i >= len(node.params):
|
|
1913
|
+
if "default" in param:
|
|
1914
|
+
value = self.visit(param["default"]).value
|
|
1915
|
+
signature_values[param["name"]] = Scalar(
|
|
1916
|
+
name=str(value), value=value, data_type=BASIC_TYPES[type(value)]
|
|
1917
|
+
)
|
|
1918
|
+
else:
|
|
1919
|
+
raise SemanticError(
|
|
1920
|
+
"1-2-11",
|
|
1921
|
+
op=node.op,
|
|
1922
|
+
received=len(node.params),
|
|
1923
|
+
expected=len(operator["params"]),
|
|
1924
|
+
)
|
|
1925
|
+
else:
|
|
1926
|
+
if isinstance(param["type"], str): # Scalar, Dataset, Component
|
|
1927
|
+
if param["type"] == "Scalar":
|
|
1928
|
+
signature_values[param["name"]] = self.visit(node.params[i])
|
|
1929
|
+
elif param["type"] in ["Dataset", "Component"]:
|
|
1930
|
+
if isinstance(node.params[i], AST.VarID):
|
|
1931
|
+
signature_values[param["name"]] = node.params[i].value # type: ignore[attr-defined]
|
|
1932
|
+
else:
|
|
1933
|
+
param_element = self.visit(node.params[i])
|
|
1934
|
+
if isinstance(param_element, Dataset):
|
|
1935
|
+
if param["type"] == "Component":
|
|
1936
|
+
raise SemanticError(
|
|
1937
|
+
"1-3-1-1",
|
|
1938
|
+
op=node.op,
|
|
1939
|
+
option=param["name"],
|
|
1940
|
+
type_1=param["type"],
|
|
1941
|
+
type_2="Dataset",
|
|
1942
|
+
)
|
|
1943
|
+
elif isinstance(param_element, Scalar) and param["type"] in [
|
|
1944
|
+
"Dataset",
|
|
1945
|
+
"Component",
|
|
1946
|
+
]:
|
|
1947
|
+
raise SemanticError(
|
|
1948
|
+
"1-3-1-1",
|
|
1949
|
+
op=node.op,
|
|
1950
|
+
option=param["name"],
|
|
1951
|
+
type_1=param["type"],
|
|
1952
|
+
type_2="Scalar",
|
|
1953
|
+
)
|
|
1954
|
+
signature_values[param["name"]] = param_element
|
|
1955
|
+
|
|
1956
|
+
else:
|
|
1957
|
+
raise NotImplementedError
|
|
1958
|
+
elif issubclass(param["type"], ScalarType): # Basic types
|
|
1959
|
+
# For basic Scalar types (Integer, Float, String, Boolean)
|
|
1960
|
+
# We validate the type is correct and cast the value
|
|
1961
|
+
param_element = self.visit(node.params[i])
|
|
1962
|
+
if isinstance(param_element, (Dataset, DataComponent)):
|
|
1963
|
+
type_2 = "Dataset" if isinstance(param_element, Dataset) else "Component"
|
|
1964
|
+
raise SemanticError(
|
|
1965
|
+
"1-3-1-1",
|
|
1966
|
+
op=node.op,
|
|
1967
|
+
option=param["name"],
|
|
1968
|
+
type_1=param["type"],
|
|
1969
|
+
type_2=type_2,
|
|
1970
|
+
)
|
|
1971
|
+
scalar_type = param["type"]
|
|
1972
|
+
if not check_unary_implicit_promotion(param_element.data_type, scalar_type):
|
|
1973
|
+
raise SemanticError(
|
|
1974
|
+
"2-3-5",
|
|
1975
|
+
param_type=scalar_type,
|
|
1976
|
+
type_name=param_element.data_type,
|
|
1977
|
+
op=node.op,
|
|
1978
|
+
param_name=param["name"],
|
|
1979
|
+
)
|
|
1980
|
+
signature_values[param["name"]] = Scalar(
|
|
1981
|
+
name=param_element.name,
|
|
1982
|
+
value=scalar_type.cast(param_element.value),
|
|
1983
|
+
data_type=scalar_type,
|
|
1984
|
+
)
|
|
1985
|
+
else:
|
|
1986
|
+
raise NotImplementedError
|
|
1987
|
+
|
|
1988
|
+
# We set it here to a list to start the stack of UDO params
|
|
1989
|
+
if self.udo_params is None:
|
|
1990
|
+
self.udo_params = []
|
|
1991
|
+
|
|
1992
|
+
# Adding parameters to the stack
|
|
1993
|
+
for k, v in signature_values.items():
|
|
1994
|
+
if hasattr(v, "name"):
|
|
1995
|
+
v = v.name # type: ignore[assignment]
|
|
1996
|
+
if v in self.signature_values:
|
|
1997
|
+
signature_values[k] = self.signature_values[v] # type: ignore[index]
|
|
1998
|
+
self.signature_values.update(signature_values)
|
|
1999
|
+
self.udo_params.append(signature_values)
|
|
2000
|
+
|
|
2001
|
+
# Calling the UDO AST, we use deepcopy to avoid changing the original UDO AST
|
|
2002
|
+
if operator is not None:
|
|
2003
|
+
result = self.visit(deepcopy(operator["expression"]))
|
|
2004
|
+
|
|
2005
|
+
if self.is_from_regular_aggregation or self.is_from_rule:
|
|
2006
|
+
result_type = "Component" if isinstance(result, DataComponent) else "Scalar"
|
|
2007
|
+
else:
|
|
2008
|
+
result_type = "Scalar" if isinstance(result, Scalar) else "Dataset"
|
|
2009
|
+
|
|
2010
|
+
if result_type != operator["output"]:
|
|
2011
|
+
raise SemanticError(
|
|
2012
|
+
"1-3-1-1",
|
|
2013
|
+
op=node.op,
|
|
2014
|
+
option="output",
|
|
2015
|
+
type_1=operator["output"],
|
|
2016
|
+
type_2=result_type,
|
|
2017
|
+
)
|
|
2018
|
+
|
|
2019
|
+
# We pop the last element of the stack (current UDO params)
|
|
2020
|
+
# to avoid using them in the next UDO call
|
|
2021
|
+
self.udo_params.pop()
|
|
2022
|
+
|
|
2023
|
+
# We set to None if empty to ensure we do not use these params anymore
|
|
2024
|
+
if len(self.udo_params) == 0:
|
|
2025
|
+
self.udo_params = None
|
|
2026
|
+
return result
|
|
2027
|
+
|
|
2028
|
+
def visit_TimeAggregation(self, node: AST.TimeAggregation) -> None:
|
|
2029
|
+
if node.operand is not None:
|
|
2030
|
+
operand = self.visit(node.operand)
|
|
2031
|
+
else:
|
|
2032
|
+
if self.aggregation_dataset is None:
|
|
2033
|
+
raise SemanticError("1-1-19-11")
|
|
2034
|
+
component_name = Time_Aggregation._get_time_id(self.aggregation_dataset)
|
|
2035
|
+
ast_operand = VarID(
|
|
2036
|
+
value=component_name,
|
|
2037
|
+
line_start=node.line_start,
|
|
2038
|
+
line_stop=node.line_stop,
|
|
2039
|
+
column_start=node.column_start,
|
|
2040
|
+
column_stop=node.column_stop,
|
|
2041
|
+
)
|
|
2042
|
+
operand = self.visit(ast_operand)
|
|
2043
|
+
return Time_Aggregation.analyze(
|
|
2044
|
+
operand=operand,
|
|
2045
|
+
period_from=node.period_from,
|
|
2046
|
+
period_to=node.period_to,
|
|
2047
|
+
conf=node.conf,
|
|
2048
|
+
)
|