vtlengine 1.4.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vtlengine/API/_InternalApi.py +791 -0
- vtlengine/API/__init__.py +612 -0
- vtlengine/API/data/schema/external_routines_schema.json +34 -0
- vtlengine/API/data/schema/json_schema_2.1.json +116 -0
- vtlengine/API/data/schema/value_domain_schema.json +97 -0
- vtlengine/AST/ASTComment.py +57 -0
- vtlengine/AST/ASTConstructor.py +598 -0
- vtlengine/AST/ASTConstructorModules/Expr.py +1928 -0
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +995 -0
- vtlengine/AST/ASTConstructorModules/Terminals.py +790 -0
- vtlengine/AST/ASTConstructorModules/__init__.py +50 -0
- vtlengine/AST/ASTDataExchange.py +10 -0
- vtlengine/AST/ASTEncoders.py +32 -0
- vtlengine/AST/ASTString.py +675 -0
- vtlengine/AST/ASTTemplate.py +558 -0
- vtlengine/AST/ASTVisitor.py +25 -0
- vtlengine/AST/DAG/__init__.py +479 -0
- vtlengine/AST/DAG/_words.py +10 -0
- vtlengine/AST/Grammar/Vtl.g4 +705 -0
- vtlengine/AST/Grammar/VtlTokens.g4 +409 -0
- vtlengine/AST/Grammar/__init__.py +0 -0
- vtlengine/AST/Grammar/lexer.py +2139 -0
- vtlengine/AST/Grammar/parser.py +16597 -0
- vtlengine/AST/Grammar/tokens.py +169 -0
- vtlengine/AST/VtlVisitor.py +824 -0
- vtlengine/AST/__init__.py +674 -0
- vtlengine/DataTypes/TimeHandling.py +562 -0
- vtlengine/DataTypes/__init__.py +863 -0
- vtlengine/DataTypes/_time_checking.py +135 -0
- vtlengine/Exceptions/__exception_file_generator.py +96 -0
- vtlengine/Exceptions/__init__.py +159 -0
- vtlengine/Exceptions/messages.py +1004 -0
- vtlengine/Interpreter/__init__.py +2048 -0
- vtlengine/Model/__init__.py +501 -0
- vtlengine/Operators/Aggregation.py +357 -0
- vtlengine/Operators/Analytic.py +455 -0
- vtlengine/Operators/Assignment.py +23 -0
- vtlengine/Operators/Boolean.py +106 -0
- vtlengine/Operators/CastOperator.py +451 -0
- vtlengine/Operators/Clause.py +366 -0
- vtlengine/Operators/Comparison.py +488 -0
- vtlengine/Operators/Conditional.py +495 -0
- vtlengine/Operators/General.py +191 -0
- vtlengine/Operators/HROperators.py +254 -0
- vtlengine/Operators/Join.py +447 -0
- vtlengine/Operators/Numeric.py +422 -0
- vtlengine/Operators/RoleSetter.py +77 -0
- vtlengine/Operators/Set.py +176 -0
- vtlengine/Operators/String.py +578 -0
- vtlengine/Operators/Time.py +1144 -0
- vtlengine/Operators/Validation.py +275 -0
- vtlengine/Operators/__init__.py +900 -0
- vtlengine/Utils/__Virtual_Assets.py +34 -0
- vtlengine/Utils/__init__.py +479 -0
- vtlengine/__extras_check.py +17 -0
- vtlengine/__init__.py +27 -0
- vtlengine/files/__init__.py +0 -0
- vtlengine/files/output/__init__.py +35 -0
- vtlengine/files/output/_time_period_representation.py +55 -0
- vtlengine/files/parser/__init__.py +240 -0
- vtlengine/files/parser/_rfc_dialect.py +22 -0
- vtlengine/py.typed +0 -0
- vtlengine-1.4.0rc2.dist-info/METADATA +89 -0
- vtlengine-1.4.0rc2.dist-info/RECORD +66 -0
- vtlengine-1.4.0rc2.dist-info/WHEEL +4 -0
- vtlengine-1.4.0rc2.dist-info/licenses/LICENSE.md +661 -0
|
@@ -0,0 +1,900 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from copy import copy
|
|
3
|
+
from typing import Any, Optional, Union
|
|
4
|
+
|
|
5
|
+
# if os.environ.get("SPARK", False):
|
|
6
|
+
# import pyspark.pandas as pd
|
|
7
|
+
# else:
|
|
8
|
+
# import pandas as pd
|
|
9
|
+
import pandas as pd
|
|
10
|
+
|
|
11
|
+
from vtlengine.AST.Grammar.tokens import (
|
|
12
|
+
AND,
|
|
13
|
+
CEIL,
|
|
14
|
+
EQ,
|
|
15
|
+
FLOOR,
|
|
16
|
+
GT,
|
|
17
|
+
GTE,
|
|
18
|
+
LT,
|
|
19
|
+
LTE,
|
|
20
|
+
NEQ,
|
|
21
|
+
OR,
|
|
22
|
+
ROUND,
|
|
23
|
+
XOR,
|
|
24
|
+
)
|
|
25
|
+
from vtlengine.DataTypes import (
|
|
26
|
+
COMP_NAME_MAPPING,
|
|
27
|
+
SCALAR_TYPES_CLASS_REVERSE,
|
|
28
|
+
binary_implicit_promotion,
|
|
29
|
+
check_binary_implicit_promotion,
|
|
30
|
+
check_unary_implicit_promotion,
|
|
31
|
+
unary_implicit_promotion,
|
|
32
|
+
)
|
|
33
|
+
from vtlengine.DataTypes.TimeHandling import (
|
|
34
|
+
PERIOD_IND_MAPPING,
|
|
35
|
+
TimeIntervalHandler,
|
|
36
|
+
TimePeriodHandler,
|
|
37
|
+
)
|
|
38
|
+
from vtlengine.Exceptions import SemanticError
|
|
39
|
+
from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar, ScalarSet
|
|
40
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
41
|
+
|
|
42
|
+
ALL_MODEL_DATA_TYPES = Union[Dataset, Scalar, DataComponent]
|
|
43
|
+
|
|
44
|
+
# This allows changing the data type of the Measure in the result Data Set
|
|
45
|
+
# when the operator is applied to mono-measure Data Sets.
|
|
46
|
+
# TODO: Check if there are more operators that allow this
|
|
47
|
+
MONOMEASURE_CHANGED_ALLOWED = [CEIL, FLOOR, ROUND]
|
|
48
|
+
BINARY_COMPARISON_OPERATORS = [EQ, NEQ, GT, GTE, LT, LTE]
|
|
49
|
+
BINARY_BOOLEAN_OPERATORS = [AND, OR, XOR]
|
|
50
|
+
|
|
51
|
+
only_semantic = False
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class Operator:
|
|
55
|
+
"""Superclass for all operators"""
|
|
56
|
+
|
|
57
|
+
op: Any = None
|
|
58
|
+
py_op: Any = None
|
|
59
|
+
spark_op: Any = None
|
|
60
|
+
type_to_check: Any = None
|
|
61
|
+
return_type: Any = None
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
def analyze(cls, *args: Any, **kwargs: Any) -> Any:
|
|
65
|
+
if only_semantic:
|
|
66
|
+
return cls.validate(*args, **kwargs)
|
|
67
|
+
return cls.evaluate(*args, **kwargs)
|
|
68
|
+
|
|
69
|
+
@classmethod
|
|
70
|
+
def cast_time_types(cls, data_type: Any, series: Any) -> Any:
|
|
71
|
+
if cls.op not in BINARY_COMPARISON_OPERATORS:
|
|
72
|
+
return series
|
|
73
|
+
if data_type.__name__ == "TimeInterval":
|
|
74
|
+
series = series.map(
|
|
75
|
+
lambda x: TimeIntervalHandler.from_iso_format(x), na_action="ignore"
|
|
76
|
+
)
|
|
77
|
+
elif data_type.__name__ == "TimePeriod":
|
|
78
|
+
series = series.map(lambda x: TimePeriodHandler(x), na_action="ignore")
|
|
79
|
+
elif data_type.__name__ == "Duration":
|
|
80
|
+
series = series.map(lambda x: PERIOD_IND_MAPPING[x], na_action="ignore")
|
|
81
|
+
return series
|
|
82
|
+
|
|
83
|
+
@classmethod
|
|
84
|
+
def cast_time_types_scalar(cls, data_type: Any, value: str) -> Any:
|
|
85
|
+
if cls.op not in BINARY_COMPARISON_OPERATORS:
|
|
86
|
+
return value
|
|
87
|
+
if value is None:
|
|
88
|
+
return None
|
|
89
|
+
if data_type.__name__ == "TimeInterval":
|
|
90
|
+
return TimeIntervalHandler.from_iso_format(value)
|
|
91
|
+
elif data_type.__name__ == "TimePeriod":
|
|
92
|
+
return TimePeriodHandler(value)
|
|
93
|
+
elif data_type.__name__ == "Duration":
|
|
94
|
+
if value not in PERIOD_IND_MAPPING:
|
|
95
|
+
raise Exception(f"Duration {value} is not valid")
|
|
96
|
+
return PERIOD_IND_MAPPING[value]
|
|
97
|
+
return value
|
|
98
|
+
|
|
99
|
+
@classmethod
|
|
100
|
+
def modify_measure_column(cls, result: Dataset) -> None:
|
|
101
|
+
"""
|
|
102
|
+
If an Operator change the data type of the Variable it is applied to (e.g., from string to
|
|
103
|
+
number), the result Data Set cannot maintain this Variable as it happens in the previous
|
|
104
|
+
cases, because a Variable cannot have different data types in different Data Sets.
|
|
105
|
+
As a consequence, the converted variable cannot follow the same rules described in the
|
|
106
|
+
sections above and must be replaced, in the result Data Set, by another Variable of the
|
|
107
|
+
proper data type.
|
|
108
|
+
For sake of simplicity, the operators changing the data type are allowed only on
|
|
109
|
+
mono-measure operand Data Sets, so that the conversion happens on just one Measure.
|
|
110
|
+
A default generic Measure is assigned by default to the result Data Set, depending on the
|
|
111
|
+
data type of the result (the default Measure Variables are reported in the table below).
|
|
112
|
+
|
|
113
|
+
Function used by the evaluate function when a dataset is involved
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
if len(result.get_measures()) == 1 and cls.return_type is not None and result is not None:
|
|
117
|
+
measure_name = result.get_measures_names()[0]
|
|
118
|
+
components = list(result.components.keys())
|
|
119
|
+
columns = list(result.data.columns) if result.data is not None else []
|
|
120
|
+
for column in columns:
|
|
121
|
+
if column not in set(components) and result.data is not None:
|
|
122
|
+
result.data[measure_name] = result.data[column]
|
|
123
|
+
del result.data[column]
|
|
124
|
+
|
|
125
|
+
@classmethod
|
|
126
|
+
def validate_dataset_type(cls, *args: Any) -> None:
|
|
127
|
+
raise Exception("Method should be implemented by inheritors")
|
|
128
|
+
|
|
129
|
+
@classmethod
|
|
130
|
+
def validate_component_type(cls, *args: Any) -> None:
|
|
131
|
+
raise Exception("Method should be implemented by inheritors")
|
|
132
|
+
|
|
133
|
+
@classmethod
|
|
134
|
+
def validate_scalar_type(cls, *args: Any) -> None:
|
|
135
|
+
raise Exception("Method should be implemented by inheritors")
|
|
136
|
+
|
|
137
|
+
@classmethod
|
|
138
|
+
def validate(cls, *args: Any, **kwargs: Any) -> Any:
|
|
139
|
+
raise Exception("Method should be implemented by inheritors")
|
|
140
|
+
|
|
141
|
+
@classmethod
|
|
142
|
+
def evaluate(cls, *args: Any, **kwargs: Any) -> Any:
|
|
143
|
+
raise Exception("Method should be implemented by inheritors")
|
|
144
|
+
|
|
145
|
+
@classmethod
|
|
146
|
+
def scalar_validation(cls, *args: Any) -> Any:
|
|
147
|
+
raise Exception("Method should be implemented by inheritors")
|
|
148
|
+
|
|
149
|
+
@classmethod
|
|
150
|
+
def component_validation(cls, *args: Any) -> Any:
|
|
151
|
+
raise Exception("Method should be implemented by inheritors")
|
|
152
|
+
|
|
153
|
+
@classmethod
|
|
154
|
+
def validate_type_compatibility(cls, *args: Any) -> bool:
|
|
155
|
+
if len(args) == 1:
|
|
156
|
+
operand = args[0]
|
|
157
|
+
return check_unary_implicit_promotion(operand, cls.type_to_check, cls.return_type)
|
|
158
|
+
if len(args) == 2:
|
|
159
|
+
left, right = args
|
|
160
|
+
return check_binary_implicit_promotion(left, right, cls.type_to_check, cls.return_type)
|
|
161
|
+
raise Exception("Method should be implemented by inheritors")
|
|
162
|
+
|
|
163
|
+
@classmethod
|
|
164
|
+
def type_validation(cls, *args: Any) -> Any:
|
|
165
|
+
if len(args) == 1:
|
|
166
|
+
operand = args[0]
|
|
167
|
+
return unary_implicit_promotion(operand, cls.type_to_check, cls.return_type)
|
|
168
|
+
if len(args) == 2:
|
|
169
|
+
left, right = args
|
|
170
|
+
return binary_implicit_promotion(left, right, cls.type_to_check, cls.return_type)
|
|
171
|
+
raise Exception("Method should be implemented by inheritors")
|
|
172
|
+
|
|
173
|
+
@classmethod
|
|
174
|
+
def apply_return_type_dataset(cls, *args: Any) -> None:
|
|
175
|
+
raise Exception("Method should be implemented by inheritors")
|
|
176
|
+
|
|
177
|
+
@classmethod
|
|
178
|
+
def apply_return_type(cls, *args: Any) -> None:
|
|
179
|
+
raise Exception("Method should be implemented by inheritors")
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _id_type_promotion_join_keys(
|
|
183
|
+
c_left: Component,
|
|
184
|
+
c_right: Component,
|
|
185
|
+
join_key: str,
|
|
186
|
+
left_data: Optional[pd.DataFrame] = None,
|
|
187
|
+
right_data: Optional[pd.DataFrame] = None,
|
|
188
|
+
) -> None:
|
|
189
|
+
if left_data is None:
|
|
190
|
+
left_data = pd.DataFrame()
|
|
191
|
+
if right_data is None:
|
|
192
|
+
right_data = pd.DataFrame()
|
|
193
|
+
|
|
194
|
+
left_type_name: str = str(c_left.data_type.__name__)
|
|
195
|
+
right_type_name: str = str(c_right.data_type.__name__)
|
|
196
|
+
|
|
197
|
+
if left_type_name == right_type_name or len(left_data) == 0 or len(right_data) == 0:
|
|
198
|
+
left_data[join_key] = left_data[join_key].astype(object)
|
|
199
|
+
right_data[join_key] = right_data[join_key].astype(object)
|
|
200
|
+
return
|
|
201
|
+
if (left_type_name == "Integer" and right_type_name == "Number") or (
|
|
202
|
+
left_type_name == "Number" and right_type_name == "Integer"
|
|
203
|
+
):
|
|
204
|
+
left_data[join_key] = left_data[join_key].map(lambda x: int(float(x)))
|
|
205
|
+
right_data[join_key] = right_data[join_key].map(lambda x: int(float(x)))
|
|
206
|
+
elif left_type_name == "String" and right_type_name in ("Integer", "Number"):
|
|
207
|
+
left_data[join_key] = left_data[join_key].map(lambda x: _handle_str_number(x))
|
|
208
|
+
elif left_type_name in ("Integer", "Number") and right_type_name == "String":
|
|
209
|
+
right_data[join_key] = right_data[join_key].map(lambda x: _handle_str_number(x))
|
|
210
|
+
left_data[join_key] = left_data[join_key].astype(object)
|
|
211
|
+
right_data[join_key] = right_data[join_key].astype(object)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def _handle_str_number(x: Union[str, int, float]) -> Union[str, int, float]:
|
|
215
|
+
if isinstance(x, int):
|
|
216
|
+
return x
|
|
217
|
+
try:
|
|
218
|
+
x = float(x)
|
|
219
|
+
if x.is_integer():
|
|
220
|
+
return int(x)
|
|
221
|
+
return x
|
|
222
|
+
except ValueError: # Unable to get to string, return the same value that will not be matched
|
|
223
|
+
return x
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
class Binary(Operator):
|
|
227
|
+
@classmethod
|
|
228
|
+
def op_func(cls, *args: Any) -> Any:
|
|
229
|
+
x, y = args
|
|
230
|
+
|
|
231
|
+
if pd.isnull(x) or pd.isnull(y):
|
|
232
|
+
return None
|
|
233
|
+
return cls.py_op(x, y)
|
|
234
|
+
|
|
235
|
+
@classmethod
|
|
236
|
+
def apply_operation_two_series(cls, left_series: Any, right_series: Any) -> Any:
|
|
237
|
+
if os.getenv("SPARK", False):
|
|
238
|
+
if cls.spark_op is None:
|
|
239
|
+
cls.spark_op = cls.py_op
|
|
240
|
+
|
|
241
|
+
nulls = left_series.isnull() | right_series.isnull()
|
|
242
|
+
result = cls.spark_op(left_series, right_series)
|
|
243
|
+
result.loc[nulls] = None
|
|
244
|
+
return result
|
|
245
|
+
result = list(map(cls.op_func, left_series.values, right_series.values))
|
|
246
|
+
return pd.Series(result, index=list(range(len(result))), dtype=object)
|
|
247
|
+
|
|
248
|
+
@classmethod
|
|
249
|
+
def apply_operation_series_scalar(
|
|
250
|
+
cls,
|
|
251
|
+
series: Any,
|
|
252
|
+
scalar: Scalar,
|
|
253
|
+
series_left: bool,
|
|
254
|
+
) -> Any:
|
|
255
|
+
if scalar is None:
|
|
256
|
+
return pd.Series(None, index=series.index)
|
|
257
|
+
if series_left:
|
|
258
|
+
return series.map(lambda x: cls.py_op(x, scalar), na_action="ignore")
|
|
259
|
+
else:
|
|
260
|
+
return series.map(lambda x: cls.py_op(scalar, x), na_action="ignore")
|
|
261
|
+
|
|
262
|
+
@classmethod
|
|
263
|
+
def validate(cls, *args: Any) -> Any:
|
|
264
|
+
"""
|
|
265
|
+
The main function for validate, applies the implicit promotion (or check it), and
|
|
266
|
+
can do a semantic check too.
|
|
267
|
+
Returns an operand.
|
|
268
|
+
"""
|
|
269
|
+
left_operand, right_operand = args
|
|
270
|
+
|
|
271
|
+
if isinstance(left_operand, Dataset) and isinstance(right_operand, Dataset):
|
|
272
|
+
return cls.dataset_validation(left_operand, right_operand)
|
|
273
|
+
if isinstance(left_operand, Dataset) and isinstance(right_operand, Scalar):
|
|
274
|
+
return cls.dataset_scalar_validation(left_operand, right_operand)
|
|
275
|
+
if isinstance(left_operand, Scalar) and isinstance(right_operand, Dataset):
|
|
276
|
+
return cls.dataset_scalar_validation(right_operand, left_operand)
|
|
277
|
+
if isinstance(left_operand, Scalar) and isinstance(right_operand, Scalar):
|
|
278
|
+
return cls.scalar_validation(left_operand, right_operand)
|
|
279
|
+
if isinstance(left_operand, DataComponent) and isinstance(right_operand, DataComponent):
|
|
280
|
+
return cls.component_validation(left_operand, right_operand)
|
|
281
|
+
if isinstance(left_operand, DataComponent) and isinstance(right_operand, Scalar):
|
|
282
|
+
return cls.component_scalar_validation(left_operand, right_operand)
|
|
283
|
+
if isinstance(left_operand, Scalar) and isinstance(right_operand, DataComponent):
|
|
284
|
+
return cls.component_scalar_validation(right_operand, left_operand)
|
|
285
|
+
# In operator
|
|
286
|
+
if isinstance(left_operand, Dataset) and isinstance(right_operand, ScalarSet):
|
|
287
|
+
return cls.dataset_set_validation(left_operand, right_operand)
|
|
288
|
+
if isinstance(left_operand, Scalar) and isinstance(right_operand, ScalarSet):
|
|
289
|
+
return cls.scalar_set_validation(left_operand, right_operand)
|
|
290
|
+
if isinstance(left_operand, DataComponent) and isinstance(right_operand, ScalarSet):
|
|
291
|
+
return cls.component_set_validation(left_operand, right_operand)
|
|
292
|
+
|
|
293
|
+
@classmethod
|
|
294
|
+
def dataset_validation(cls, left_operand: Dataset, right_operand: Dataset) -> Dataset:
|
|
295
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
296
|
+
left_identifiers = left_operand.get_identifiers_names()
|
|
297
|
+
right_identifiers = right_operand.get_identifiers_names()
|
|
298
|
+
|
|
299
|
+
use_right_components = len(left_identifiers) < len(right_identifiers)
|
|
300
|
+
|
|
301
|
+
left_measures = sorted(left_operand.get_measures(), key=lambda x: x.name)
|
|
302
|
+
right_measures = sorted(right_operand.get_measures(), key=lambda x: x.name)
|
|
303
|
+
left_measures_names = [measure.name for measure in left_measures]
|
|
304
|
+
right_measures_names = [measure.name for measure in right_measures]
|
|
305
|
+
|
|
306
|
+
if left_measures_names != right_measures_names:
|
|
307
|
+
raise SemanticError(
|
|
308
|
+
"1-1-14-1",
|
|
309
|
+
op=cls.op,
|
|
310
|
+
left=left_measures_names,
|
|
311
|
+
right=right_measures_names,
|
|
312
|
+
)
|
|
313
|
+
elif len(left_measures) == 0:
|
|
314
|
+
raise SemanticError("1-1-1-8", op=cls.op, name=left_operand.name)
|
|
315
|
+
for left_measure, right_measure in zip(left_measures, right_measures):
|
|
316
|
+
cls.type_validation(left_measure.data_type, right_measure.data_type)
|
|
317
|
+
|
|
318
|
+
# We do not need anymore these variables
|
|
319
|
+
del left_measures
|
|
320
|
+
del right_measures
|
|
321
|
+
del left_measures_names
|
|
322
|
+
del right_measures_names
|
|
323
|
+
|
|
324
|
+
join_keys = list(set(left_identifiers).intersection(right_identifiers))
|
|
325
|
+
if len(join_keys) == 0:
|
|
326
|
+
raise SemanticError("1-2-10", op=cls.op)
|
|
327
|
+
|
|
328
|
+
# Deleting extra identifiers that we do not need anymore
|
|
329
|
+
|
|
330
|
+
base_operand = right_operand if use_right_components else left_operand
|
|
331
|
+
result_components = {
|
|
332
|
+
component_name: copy(component)
|
|
333
|
+
for component_name, component in base_operand.components.items()
|
|
334
|
+
if component.role in [Role.IDENTIFIER, Role.MEASURE]
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
for comp in [x for x in result_components.values() if x.role == Role.MEASURE]:
|
|
338
|
+
if comp.name in left_operand.components and comp.name in right_operand.components:
|
|
339
|
+
left_comp = left_operand.components[comp.name]
|
|
340
|
+
right_comp = right_operand.components[comp.name]
|
|
341
|
+
comp.nullable = left_comp.nullable or right_comp.nullable
|
|
342
|
+
|
|
343
|
+
result_dataset = Dataset(name=dataset_name, components=result_components, data=None)
|
|
344
|
+
cls.apply_return_type_dataset(result_dataset, left_operand, right_operand)
|
|
345
|
+
return result_dataset
|
|
346
|
+
|
|
347
|
+
@classmethod
|
|
348
|
+
def dataset_scalar_validation(cls, dataset: Dataset, scalar: Scalar) -> Dataset:
|
|
349
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
350
|
+
if len(dataset.get_measures()) == 0:
|
|
351
|
+
raise SemanticError("1-1-1-8", op=cls.op, name=dataset.name)
|
|
352
|
+
|
|
353
|
+
result_components = {
|
|
354
|
+
comp_name: copy(comp)
|
|
355
|
+
for comp_name, comp in dataset.components.items()
|
|
356
|
+
if comp.role in [Role.IDENTIFIER, Role.MEASURE]
|
|
357
|
+
}
|
|
358
|
+
result_dataset = Dataset(name=dataset_name, components=result_components, data=None)
|
|
359
|
+
cls.apply_return_type_dataset(result_dataset, dataset, scalar)
|
|
360
|
+
return result_dataset
|
|
361
|
+
|
|
362
|
+
@classmethod
|
|
363
|
+
def scalar_validation(cls, left_operand: Scalar, right_operand: Scalar) -> Scalar:
|
|
364
|
+
if not cls.validate_type_compatibility(left_operand.data_type, right_operand.data_type):
|
|
365
|
+
raise SemanticError(
|
|
366
|
+
"1-1-1-2",
|
|
367
|
+
type_1=left_operand.data_type,
|
|
368
|
+
type_2=right_operand.data_type,
|
|
369
|
+
type_check=cls.type_to_check,
|
|
370
|
+
)
|
|
371
|
+
return Scalar(
|
|
372
|
+
name="result",
|
|
373
|
+
data_type=cls.type_validation(left_operand.data_type, right_operand.data_type),
|
|
374
|
+
value=None,
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
@classmethod
|
|
378
|
+
def component_validation(
|
|
379
|
+
cls, left_operand: DataComponent, right_operand: DataComponent
|
|
380
|
+
) -> DataComponent:
|
|
381
|
+
"""
|
|
382
|
+
Validates the compatibility between the types of the components and the operator
|
|
383
|
+
:param left_operand: The left component
|
|
384
|
+
:param right_operand: The right component
|
|
385
|
+
:return: The result data type of the validation
|
|
386
|
+
"""
|
|
387
|
+
comp_name = VirtualCounter._new_dc_name()
|
|
388
|
+
result_data_type = cls.type_validation(left_operand.data_type, right_operand.data_type)
|
|
389
|
+
result = DataComponent(
|
|
390
|
+
name=comp_name,
|
|
391
|
+
data_type=result_data_type,
|
|
392
|
+
data=None,
|
|
393
|
+
role=left_operand.role,
|
|
394
|
+
nullable=(left_operand.nullable or right_operand.nullable),
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
return result
|
|
398
|
+
|
|
399
|
+
@classmethod
|
|
400
|
+
def component_scalar_validation(cls, component: DataComponent, scalar: Scalar) -> DataComponent:
|
|
401
|
+
cls.type_validation(component.data_type, scalar.data_type)
|
|
402
|
+
result = DataComponent(
|
|
403
|
+
name=component.name,
|
|
404
|
+
data_type=cls.type_validation(component.data_type, scalar.data_type),
|
|
405
|
+
data=None,
|
|
406
|
+
role=component.role,
|
|
407
|
+
nullable=component.nullable or scalar is None,
|
|
408
|
+
)
|
|
409
|
+
return result
|
|
410
|
+
|
|
411
|
+
@classmethod
|
|
412
|
+
def dataset_set_validation(cls, dataset: Dataset, scalar_set: ScalarSet) -> Dataset:
|
|
413
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
414
|
+
if len(dataset.get_measures()) == 0:
|
|
415
|
+
raise SemanticError("1-1-1-8", op=cls.op, name=dataset.name)
|
|
416
|
+
for measure in dataset.get_measures():
|
|
417
|
+
cls.type_validation(measure.data_type, scalar_set.data_type)
|
|
418
|
+
result_components = {
|
|
419
|
+
comp_name: copy(comp)
|
|
420
|
+
for comp_name, comp in dataset.components.items()
|
|
421
|
+
if comp.role in [Role.IDENTIFIER, Role.MEASURE]
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
result_dataset = Dataset(name=dataset_name, components=result_components, data=None)
|
|
425
|
+
cls.apply_return_type_dataset(result_dataset, dataset, scalar_set)
|
|
426
|
+
return result_dataset
|
|
427
|
+
|
|
428
|
+
@classmethod
|
|
429
|
+
def component_set_validation(
|
|
430
|
+
cls, component: DataComponent, scalar_set: ScalarSet
|
|
431
|
+
) -> DataComponent:
|
|
432
|
+
comp_name = VirtualCounter._new_dc_name()
|
|
433
|
+
cls.type_validation(component.data_type, scalar_set.data_type)
|
|
434
|
+
result = DataComponent(
|
|
435
|
+
name=comp_name,
|
|
436
|
+
data_type=cls.type_validation(component.data_type, scalar_set.data_type),
|
|
437
|
+
data=None,
|
|
438
|
+
role=Role.MEASURE,
|
|
439
|
+
nullable=component.nullable,
|
|
440
|
+
)
|
|
441
|
+
return result
|
|
442
|
+
|
|
443
|
+
@classmethod
|
|
444
|
+
def scalar_set_validation(cls, scalar: Scalar, scalar_set: ScalarSet) -> Scalar:
|
|
445
|
+
cls.type_validation(scalar.data_type, scalar_set.data_type)
|
|
446
|
+
return Scalar(
|
|
447
|
+
name="result",
|
|
448
|
+
data_type=cls.type_validation(scalar.data_type, scalar_set.data_type),
|
|
449
|
+
value=None,
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
# The following class method implements the type promotion
|
|
453
|
+
@classmethod
|
|
454
|
+
def type_validation(cls, left_type: Any, right_type: Any) -> Any:
|
|
455
|
+
"""
|
|
456
|
+
Validates the compatibility between the types of the operands and the operator
|
|
457
|
+
and give us the result ScalarType of the promotion
|
|
458
|
+
(implicit type promotion : binary_implicit_type_promotion)
|
|
459
|
+
|
|
460
|
+
:param left_type: The left operand data type
|
|
461
|
+
:param right_type: The right operand data type
|
|
462
|
+
|
|
463
|
+
:return: result ScalarType or exception
|
|
464
|
+
"""
|
|
465
|
+
|
|
466
|
+
return binary_implicit_promotion(left_type, right_type, cls.type_to_check, cls.return_type)
|
|
467
|
+
|
|
468
|
+
# The following class method checks the type promotion
|
|
469
|
+
@classmethod
|
|
470
|
+
def validate_type_compatibility(cls, left: Any, right: Any) -> bool:
|
|
471
|
+
"""
|
|
472
|
+
Validates the compatibility between the types of the operands and the operator
|
|
473
|
+
(implicit type promotion : check_binary_implicit_type_promotion)
|
|
474
|
+
|
|
475
|
+
:param left: The left operand
|
|
476
|
+
:param right: The right operand
|
|
477
|
+
|
|
478
|
+
:return: True if the types are compatible, False otherwise
|
|
479
|
+
"""
|
|
480
|
+
|
|
481
|
+
return check_binary_implicit_promotion(left, right, cls.type_to_check, cls.return_type)
|
|
482
|
+
|
|
483
|
+
@classmethod
|
|
484
|
+
def apply_return_type_dataset(
|
|
485
|
+
cls, result_dataset: Dataset, left_operand: Any, right_operand: Any
|
|
486
|
+
) -> None:
|
|
487
|
+
"""
|
|
488
|
+
Used in dataset's validation.
|
|
489
|
+
Changes the result dataset and give us his final form
|
|
490
|
+
(#TODO: write this explanation in a better way)
|
|
491
|
+
"""
|
|
492
|
+
|
|
493
|
+
changed_allowed = cls.op in MONOMEASURE_CHANGED_ALLOWED
|
|
494
|
+
is_mono_measure = len(result_dataset.get_measures()) == 1
|
|
495
|
+
for measure in result_dataset.get_measures():
|
|
496
|
+
left_type = left_operand.get_component(measure.name).data_type
|
|
497
|
+
if isinstance(right_operand, (ScalarSet, Scalar)):
|
|
498
|
+
right_type = right_operand.data_type
|
|
499
|
+
else:
|
|
500
|
+
right_type = right_operand.get_component(measure.name).data_type
|
|
501
|
+
|
|
502
|
+
result_data_type = cls.type_validation(left_type, right_type)
|
|
503
|
+
if is_mono_measure and left_type.promotion_changed_type(result_data_type):
|
|
504
|
+
component = Component(
|
|
505
|
+
name=COMP_NAME_MAPPING[result_data_type],
|
|
506
|
+
data_type=result_data_type,
|
|
507
|
+
role=Role.MEASURE,
|
|
508
|
+
nullable=measure.nullable,
|
|
509
|
+
)
|
|
510
|
+
result_dataset.delete_component(measure.name)
|
|
511
|
+
result_dataset.add_component(component)
|
|
512
|
+
if result_dataset.data is not None:
|
|
513
|
+
result_dataset.data.rename(columns={measure.name: component.name}, inplace=True)
|
|
514
|
+
elif (
|
|
515
|
+
changed_allowed is False
|
|
516
|
+
and is_mono_measure is False
|
|
517
|
+
and left_type.promotion_changed_type(result_data_type)
|
|
518
|
+
):
|
|
519
|
+
raise SemanticError("1-1-1-4", op=cls.op)
|
|
520
|
+
else:
|
|
521
|
+
measure.data_type = result_data_type
|
|
522
|
+
|
|
523
|
+
@classmethod
|
|
524
|
+
def dataset_evaluation(cls, left_operand: Dataset, right_operand: Dataset) -> Dataset:
|
|
525
|
+
result_dataset = cls.dataset_validation(left_operand, right_operand)
|
|
526
|
+
|
|
527
|
+
use_right_as_base = False
|
|
528
|
+
if len(left_operand.get_identifiers_names()) < len(right_operand.get_identifiers_names()):
|
|
529
|
+
use_right_as_base = True
|
|
530
|
+
base_operand_data = right_operand.data
|
|
531
|
+
other_operand_data = left_operand.data
|
|
532
|
+
else:
|
|
533
|
+
base_operand_data = left_operand.data
|
|
534
|
+
other_operand_data = right_operand.data
|
|
535
|
+
|
|
536
|
+
join_keys = list(
|
|
537
|
+
set(left_operand.get_identifiers_names()).intersection(
|
|
538
|
+
right_operand.get_identifiers_names()
|
|
539
|
+
)
|
|
540
|
+
)
|
|
541
|
+
|
|
542
|
+
for join_key in join_keys:
|
|
543
|
+
_id_type_promotion_join_keys(
|
|
544
|
+
left_operand.get_component(join_key),
|
|
545
|
+
right_operand.get_component(join_key),
|
|
546
|
+
join_key,
|
|
547
|
+
base_operand_data,
|
|
548
|
+
other_operand_data,
|
|
549
|
+
)
|
|
550
|
+
|
|
551
|
+
try:
|
|
552
|
+
# Merge the data
|
|
553
|
+
if base_operand_data is None or other_operand_data is None:
|
|
554
|
+
result_data: pd.DataFrame = pd.DataFrame()
|
|
555
|
+
else:
|
|
556
|
+
result_data = pd.merge(
|
|
557
|
+
base_operand_data,
|
|
558
|
+
other_operand_data,
|
|
559
|
+
how="inner",
|
|
560
|
+
on=join_keys,
|
|
561
|
+
suffixes=("_x", "_y"),
|
|
562
|
+
)
|
|
563
|
+
except ValueError as e:
|
|
564
|
+
raise Exception(f"Error merging datasets on Binary Operator: {str(e)}")
|
|
565
|
+
|
|
566
|
+
# Measures are the same, using left operand measures names
|
|
567
|
+
for measure in left_operand.get_measures():
|
|
568
|
+
result_data[measure.name + "_x"] = cls.cast_time_types(
|
|
569
|
+
measure.data_type, result_data[measure.name + "_x"]
|
|
570
|
+
)
|
|
571
|
+
result_data[measure.name + "_y"] = cls.cast_time_types(
|
|
572
|
+
measure.data_type, result_data[measure.name + "_y"]
|
|
573
|
+
)
|
|
574
|
+
if use_right_as_base:
|
|
575
|
+
result_data[measure.name] = cls.apply_operation_two_series(
|
|
576
|
+
result_data[measure.name + "_y"], result_data[measure.name + "_x"]
|
|
577
|
+
)
|
|
578
|
+
else:
|
|
579
|
+
result_data[measure.name] = cls.apply_operation_two_series(
|
|
580
|
+
result_data[measure.name + "_x"], result_data[measure.name + "_y"]
|
|
581
|
+
)
|
|
582
|
+
result_data = result_data.drop([measure.name + "_x", measure.name + "_y"], axis=1)
|
|
583
|
+
|
|
584
|
+
# Delete attributes from the result data
|
|
585
|
+
attributes = list(
|
|
586
|
+
set(left_operand.get_attributes_names()).union(right_operand.get_attributes_names())
|
|
587
|
+
)
|
|
588
|
+
for att in attributes:
|
|
589
|
+
if att in result_data.columns:
|
|
590
|
+
result_data = result_data.drop(att, axis=1)
|
|
591
|
+
if att + "_x" in result_data.columns:
|
|
592
|
+
result_data = result_data.drop(att + "_x", axis=1)
|
|
593
|
+
if att + "_y" in result_data.columns:
|
|
594
|
+
result_data = result_data.drop(att + "_y", axis=1)
|
|
595
|
+
|
|
596
|
+
result_dataset.data = result_data
|
|
597
|
+
cls.modify_measure_column(result_dataset)
|
|
598
|
+
|
|
599
|
+
return result_dataset
|
|
600
|
+
|
|
601
|
+
@classmethod
|
|
602
|
+
def scalar_evaluation(cls, left_operand: Scalar, right_operand: Scalar) -> Scalar:
|
|
603
|
+
result_scalar = cls.scalar_validation(left_operand, right_operand)
|
|
604
|
+
result_scalar.value = cls.op_func(left_operand.value, right_operand.value)
|
|
605
|
+
return result_scalar
|
|
606
|
+
|
|
607
|
+
@classmethod
|
|
608
|
+
def dataset_scalar_evaluation(
|
|
609
|
+
cls, dataset: Dataset, scalar: Scalar, dataset_left: bool = True
|
|
610
|
+
) -> Dataset:
|
|
611
|
+
result_dataset = cls.dataset_scalar_validation(dataset, scalar)
|
|
612
|
+
result_data = dataset.data.copy() if dataset.data is not None else pd.DataFrame()
|
|
613
|
+
result_dataset.data = result_data
|
|
614
|
+
|
|
615
|
+
scalar_value = cls.cast_time_types_scalar(scalar.data_type, scalar.value)
|
|
616
|
+
|
|
617
|
+
for measure in dataset.get_measures():
|
|
618
|
+
measure_data = cls.cast_time_types(measure.data_type, result_data[measure.name].copy())
|
|
619
|
+
if (
|
|
620
|
+
measure.data_type.__name__.__str__() == "Duration"
|
|
621
|
+
and not isinstance(scalar_value, int)
|
|
622
|
+
and scalar_value is not None
|
|
623
|
+
):
|
|
624
|
+
scalar_value = PERIOD_IND_MAPPING[scalar_value]
|
|
625
|
+
result_dataset.data[measure.name] = cls.apply_operation_series_scalar(
|
|
626
|
+
measure_data, scalar_value, dataset_left
|
|
627
|
+
)
|
|
628
|
+
|
|
629
|
+
result_dataset.data = result_data
|
|
630
|
+
cols_to_keep = dataset.get_identifiers_names() + dataset.get_measures_names()
|
|
631
|
+
result_dataset.data = result_dataset.data[cols_to_keep]
|
|
632
|
+
cls.modify_measure_column(result_dataset)
|
|
633
|
+
return result_dataset
|
|
634
|
+
|
|
635
|
+
@classmethod
|
|
636
|
+
def component_evaluation(
|
|
637
|
+
cls, left_operand: DataComponent, right_operand: DataComponent
|
|
638
|
+
) -> DataComponent:
|
|
639
|
+
result_component = cls.component_validation(left_operand, right_operand)
|
|
640
|
+
left_data = cls.cast_time_types(
|
|
641
|
+
left_operand.data_type,
|
|
642
|
+
left_operand.data.copy() if left_operand.data is not None else pd.Series(),
|
|
643
|
+
)
|
|
644
|
+
right_data = cls.cast_time_types(
|
|
645
|
+
right_operand.data_type,
|
|
646
|
+
(right_operand.data.copy() if right_operand.data is not None else pd.Series()),
|
|
647
|
+
)
|
|
648
|
+
result_component.data = cls.apply_operation_two_series(left_data, right_data)
|
|
649
|
+
return result_component
|
|
650
|
+
|
|
651
|
+
@classmethod
|
|
652
|
+
def component_scalar_evaluation(
|
|
653
|
+
cls, component: DataComponent, scalar: Scalar, component_left: bool = True
|
|
654
|
+
) -> DataComponent:
|
|
655
|
+
result_component = cls.component_scalar_validation(component, scalar)
|
|
656
|
+
comp_data = cls.cast_time_types(
|
|
657
|
+
component.data_type,
|
|
658
|
+
component.data.copy() if component.data is not None else pd.Series(),
|
|
659
|
+
)
|
|
660
|
+
scalar_value = cls.cast_time_types_scalar(scalar.data_type, scalar.value)
|
|
661
|
+
if (
|
|
662
|
+
component.data_type.__name__.__str__() == "Duration"
|
|
663
|
+
and not isinstance(scalar_value, int)
|
|
664
|
+
and scalar_value is not None
|
|
665
|
+
):
|
|
666
|
+
scalar_value = PERIOD_IND_MAPPING[scalar_value]
|
|
667
|
+
result_component.data = cls.apply_operation_series_scalar(
|
|
668
|
+
comp_data, scalar_value, component_left
|
|
669
|
+
)
|
|
670
|
+
return result_component
|
|
671
|
+
|
|
672
|
+
@classmethod
|
|
673
|
+
def dataset_set_evaluation(cls, dataset: Dataset, scalar_set: ScalarSet) -> Dataset:
|
|
674
|
+
result_dataset = cls.dataset_set_validation(dataset, scalar_set)
|
|
675
|
+
result_data = dataset.data.copy() if dataset.data is not None else pd.DataFrame()
|
|
676
|
+
|
|
677
|
+
for measure_name in dataset.get_measures_names():
|
|
678
|
+
if dataset.data is not None:
|
|
679
|
+
result_data[measure_name] = cls.apply_operation_two_series(
|
|
680
|
+
dataset.data[measure_name], scalar_set
|
|
681
|
+
)
|
|
682
|
+
|
|
683
|
+
cols_to_keep = dataset.get_identifiers_names() + dataset.get_measures_names()
|
|
684
|
+
result_dataset.data = result_data[cols_to_keep]
|
|
685
|
+
cls.modify_measure_column(result_dataset)
|
|
686
|
+
|
|
687
|
+
return result_dataset
|
|
688
|
+
|
|
689
|
+
@classmethod
|
|
690
|
+
def component_set_evaluation(
|
|
691
|
+
cls, component: DataComponent, scalar_set: ScalarSet
|
|
692
|
+
) -> DataComponent:
|
|
693
|
+
result_component = cls.component_set_validation(component, scalar_set)
|
|
694
|
+
result_component.data = cls.apply_operation_two_series(
|
|
695
|
+
component.data.copy() if component.data is not None else pd.Series(),
|
|
696
|
+
scalar_set,
|
|
697
|
+
)
|
|
698
|
+
return result_component
|
|
699
|
+
|
|
700
|
+
@classmethod
|
|
701
|
+
def scalar_set_evaluation(cls, scalar: Scalar, scalar_set: ScalarSet) -> Scalar:
|
|
702
|
+
result_scalar = cls.scalar_set_validation(scalar, scalar_set)
|
|
703
|
+
result_scalar.value = cls.op_func(scalar.value, scalar_set)
|
|
704
|
+
return result_scalar
|
|
705
|
+
|
|
706
|
+
@classmethod
|
|
707
|
+
def evaluate(cls, left_operand: Any, right_operand: Any) -> Any:
|
|
708
|
+
"""
|
|
709
|
+
Evaluate the operation (based on validation output)
|
|
710
|
+
:param left_operand: The left operand
|
|
711
|
+
:param right_operand: The right operand
|
|
712
|
+
:return: The result of the operation
|
|
713
|
+
"""
|
|
714
|
+
|
|
715
|
+
if isinstance(left_operand, Dataset) and isinstance(right_operand, Dataset):
|
|
716
|
+
return cls.dataset_evaluation(left_operand, right_operand)
|
|
717
|
+
if isinstance(left_operand, Scalar) and isinstance(right_operand, Scalar):
|
|
718
|
+
return cls.scalar_evaluation(left_operand, right_operand)
|
|
719
|
+
if isinstance(left_operand, Dataset) and isinstance(right_operand, Scalar):
|
|
720
|
+
return cls.dataset_scalar_evaluation(left_operand, right_operand, dataset_left=True)
|
|
721
|
+
if isinstance(left_operand, Scalar) and isinstance(right_operand, Dataset):
|
|
722
|
+
return cls.dataset_scalar_evaluation(right_operand, left_operand, dataset_left=False)
|
|
723
|
+
if isinstance(left_operand, DataComponent) and isinstance(right_operand, DataComponent):
|
|
724
|
+
return cls.component_evaluation(left_operand, right_operand)
|
|
725
|
+
if isinstance(left_operand, DataComponent) and isinstance(right_operand, Scalar):
|
|
726
|
+
return cls.component_scalar_evaluation(left_operand, right_operand, component_left=True)
|
|
727
|
+
if isinstance(left_operand, Scalar) and isinstance(right_operand, DataComponent):
|
|
728
|
+
return cls.component_scalar_evaluation(
|
|
729
|
+
right_operand, left_operand, component_left=False
|
|
730
|
+
)
|
|
731
|
+
if isinstance(left_operand, Dataset) and isinstance(right_operand, ScalarSet):
|
|
732
|
+
return cls.dataset_set_evaluation(left_operand, right_operand)
|
|
733
|
+
if isinstance(left_operand, DataComponent) and isinstance(right_operand, ScalarSet):
|
|
734
|
+
return cls.component_set_evaluation(left_operand, right_operand)
|
|
735
|
+
if isinstance(left_operand, Scalar) and isinstance(right_operand, ScalarSet):
|
|
736
|
+
return cls.scalar_set_evaluation(left_operand, right_operand)
|
|
737
|
+
|
|
738
|
+
|
|
739
|
+
class Unary(Operator):
|
|
740
|
+
@classmethod
|
|
741
|
+
def op_func(cls, *args: Any) -> Any:
|
|
742
|
+
x = args[0]
|
|
743
|
+
|
|
744
|
+
return None if pd.isnull(x) else cls.py_op(x)
|
|
745
|
+
|
|
746
|
+
@classmethod
|
|
747
|
+
def apply_operation_component(cls, series: Any) -> Any:
|
|
748
|
+
"""
|
|
749
|
+
Applies the operation to a component
|
|
750
|
+
"""
|
|
751
|
+
|
|
752
|
+
return series.map(cls.py_op, na_action="ignore")
|
|
753
|
+
|
|
754
|
+
@classmethod
|
|
755
|
+
def validate(cls, operand: Any) -> Any:
|
|
756
|
+
"""
|
|
757
|
+
The main function for validate, applies the implicit promotion (or check it), and
|
|
758
|
+
can do a semantic check too.
|
|
759
|
+
Returns an operand.
|
|
760
|
+
"""
|
|
761
|
+
|
|
762
|
+
if isinstance(operand, Dataset):
|
|
763
|
+
return cls.dataset_validation(operand)
|
|
764
|
+
elif isinstance(operand, DataComponent):
|
|
765
|
+
return cls.component_validation(operand)
|
|
766
|
+
elif isinstance(operand, Scalar):
|
|
767
|
+
return cls.scalar_validation(operand)
|
|
768
|
+
|
|
769
|
+
@classmethod
|
|
770
|
+
def dataset_validation(cls, operand: Dataset) -> Dataset:
|
|
771
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
772
|
+
cls.validate_dataset_type(operand)
|
|
773
|
+
if len(operand.get_measures()) == 0:
|
|
774
|
+
raise SemanticError("1-1-1-8", op=cls.op, name=operand.name)
|
|
775
|
+
result_components = {
|
|
776
|
+
comp_name: copy(comp)
|
|
777
|
+
for comp_name, comp in operand.components.items()
|
|
778
|
+
if comp.role in [Role.IDENTIFIER, Role.MEASURE]
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
result_dataset = Dataset(name=dataset_name, components=result_components, data=None)
|
|
782
|
+
cls.apply_return_type_dataset(result_dataset, operand)
|
|
783
|
+
return result_dataset
|
|
784
|
+
|
|
785
|
+
@classmethod
|
|
786
|
+
def scalar_validation(cls, operand: Scalar) -> Scalar:
|
|
787
|
+
result_type = cls.type_validation(operand.data_type)
|
|
788
|
+
result = Scalar(name="result", data_type=result_type, value=None)
|
|
789
|
+
return result
|
|
790
|
+
|
|
791
|
+
@classmethod
|
|
792
|
+
def component_validation(cls, operand: DataComponent) -> DataComponent:
|
|
793
|
+
comp_name = VirtualCounter._new_dc_name()
|
|
794
|
+
result_type = cls.type_validation(operand.data_type)
|
|
795
|
+
result = DataComponent(
|
|
796
|
+
name=comp_name,
|
|
797
|
+
data_type=result_type,
|
|
798
|
+
data=None,
|
|
799
|
+
role=operand.role,
|
|
800
|
+
nullable=operand.nullable,
|
|
801
|
+
)
|
|
802
|
+
return result
|
|
803
|
+
|
|
804
|
+
# The following class method implements the type promotion
|
|
805
|
+
@classmethod
|
|
806
|
+
def type_validation(cls, operand: Any) -> Any:
|
|
807
|
+
return unary_implicit_promotion(operand, cls.type_to_check, cls.return_type)
|
|
808
|
+
|
|
809
|
+
# The following class method checks the type promotion
|
|
810
|
+
@classmethod
|
|
811
|
+
def validate_type_compatibility(cls, operand: Any) -> bool:
|
|
812
|
+
return check_unary_implicit_promotion(operand, cls.type_to_check, cls.return_type)
|
|
813
|
+
|
|
814
|
+
@classmethod
|
|
815
|
+
def validate_dataset_type(cls, dataset: Dataset) -> None:
|
|
816
|
+
if cls.type_to_check is not None:
|
|
817
|
+
for measure in dataset.get_measures():
|
|
818
|
+
if not cls.validate_type_compatibility(measure.data_type):
|
|
819
|
+
raise SemanticError(
|
|
820
|
+
"1-1-1-3",
|
|
821
|
+
op=cls.op,
|
|
822
|
+
entity=measure.role.value,
|
|
823
|
+
name=measure.name,
|
|
824
|
+
target_type=SCALAR_TYPES_CLASS_REVERSE[cls.type_to_check],
|
|
825
|
+
)
|
|
826
|
+
|
|
827
|
+
@classmethod
|
|
828
|
+
def validate_scalar_type(cls, scalar: Scalar) -> None:
|
|
829
|
+
if cls.type_to_check is not None and not cls.validate_type_compatibility(scalar.data_type):
|
|
830
|
+
raise SemanticError(
|
|
831
|
+
"1-1-1-5",
|
|
832
|
+
op=cls.op,
|
|
833
|
+
name=scalar.name,
|
|
834
|
+
type=SCALAR_TYPES_CLASS_REVERSE[scalar.data_type],
|
|
835
|
+
)
|
|
836
|
+
|
|
837
|
+
@classmethod
|
|
838
|
+
def apply_return_type_dataset(cls, result_dataset: Dataset, operand: Dataset) -> None:
|
|
839
|
+
changed_allowed = cls.op in MONOMEASURE_CHANGED_ALLOWED
|
|
840
|
+
is_mono_measure = len(operand.get_measures()) == 1
|
|
841
|
+
for measure in result_dataset.get_measures():
|
|
842
|
+
operand_type = operand.get_component(measure.name).data_type
|
|
843
|
+
|
|
844
|
+
result_data_type = cls.type_validation(operand_type)
|
|
845
|
+
if is_mono_measure and operand_type.promotion_changed_type(result_data_type):
|
|
846
|
+
component = Component(
|
|
847
|
+
name=COMP_NAME_MAPPING[result_data_type],
|
|
848
|
+
data_type=result_data_type,
|
|
849
|
+
role=Role.MEASURE,
|
|
850
|
+
nullable=measure.nullable,
|
|
851
|
+
)
|
|
852
|
+
result_dataset.delete_component(measure.name)
|
|
853
|
+
result_dataset.add_component(component)
|
|
854
|
+
if result_dataset.data is not None:
|
|
855
|
+
result_dataset.data.rename(columns={measure.name: component.name}, inplace=True)
|
|
856
|
+
elif (
|
|
857
|
+
changed_allowed is False
|
|
858
|
+
and is_mono_measure is False
|
|
859
|
+
and operand_type.promotion_changed_type(result_data_type)
|
|
860
|
+
):
|
|
861
|
+
raise SemanticError("1-1-1-4", op=cls.op)
|
|
862
|
+
else:
|
|
863
|
+
measure.data_type = result_data_type
|
|
864
|
+
|
|
865
|
+
@classmethod
|
|
866
|
+
def evaluate(cls, operand: ALL_MODEL_DATA_TYPES) -> Any:
|
|
867
|
+
if isinstance(operand, Dataset):
|
|
868
|
+
return cls.dataset_evaluation(operand)
|
|
869
|
+
if isinstance(operand, Scalar):
|
|
870
|
+
return cls.scalar_evaluation(operand)
|
|
871
|
+
if isinstance(operand, DataComponent):
|
|
872
|
+
return cls.component_evaluation(operand)
|
|
873
|
+
|
|
874
|
+
@classmethod
|
|
875
|
+
def dataset_evaluation(cls, operand: Dataset) -> Dataset:
|
|
876
|
+
result_dataset = cls.dataset_validation(operand)
|
|
877
|
+
result_data = operand.data.copy() if operand.data is not None else pd.DataFrame()
|
|
878
|
+
for measure_name in operand.get_measures_names():
|
|
879
|
+
result_data[measure_name] = cls.apply_operation_component(result_data[measure_name])
|
|
880
|
+
|
|
881
|
+
cols_to_keep = operand.get_identifiers_names() + operand.get_measures_names()
|
|
882
|
+
result_data = result_data[cols_to_keep]
|
|
883
|
+
|
|
884
|
+
result_dataset.data = result_data
|
|
885
|
+
cls.modify_measure_column(result_dataset)
|
|
886
|
+
return result_dataset
|
|
887
|
+
|
|
888
|
+
@classmethod
|
|
889
|
+
def scalar_evaluation(cls, operand: Scalar) -> Scalar:
|
|
890
|
+
result_scalar = cls.scalar_validation(operand)
|
|
891
|
+
result_scalar.value = cls.op_func(operand.value)
|
|
892
|
+
return result_scalar
|
|
893
|
+
|
|
894
|
+
@classmethod
|
|
895
|
+
def component_evaluation(cls, operand: DataComponent) -> DataComponent:
|
|
896
|
+
result_component = cls.component_validation(operand)
|
|
897
|
+
result_component.data = cls.apply_operation_component(
|
|
898
|
+
operand.data.copy() if operand.data is not None else pd.Series()
|
|
899
|
+
)
|
|
900
|
+
return result_component
|