vtlengine 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +153 -100
- vtlengine/API/__init__.py +109 -67
- vtlengine/AST/ASTConstructor.py +188 -98
- vtlengine/AST/ASTConstructorModules/Expr.py +306 -200
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +172 -102
- vtlengine/AST/ASTConstructorModules/Terminals.py +158 -95
- vtlengine/AST/ASTEncoders.py +1 -1
- vtlengine/AST/ASTTemplate.py +8 -9
- vtlengine/AST/ASTVisitor.py +8 -12
- vtlengine/AST/DAG/__init__.py +43 -35
- vtlengine/AST/DAG/_words.py +4 -4
- vtlengine/AST/Grammar/lexer.py +732 -142
- vtlengine/AST/Grammar/parser.py +2188 -826
- vtlengine/AST/Grammar/tokens.py +128 -128
- vtlengine/AST/VtlVisitor.py +7 -4
- vtlengine/AST/__init__.py +22 -11
- vtlengine/DataTypes/NumericTypesHandling.py +5 -4
- vtlengine/DataTypes/TimeHandling.py +194 -301
- vtlengine/DataTypes/__init__.py +304 -218
- vtlengine/Exceptions/__init__.py +52 -27
- vtlengine/Exceptions/messages.py +134 -62
- vtlengine/Interpreter/__init__.py +781 -487
- vtlengine/Model/__init__.py +165 -121
- vtlengine/Operators/Aggregation.py +156 -95
- vtlengine/Operators/Analytic.py +115 -59
- vtlengine/Operators/Assignment.py +7 -4
- vtlengine/Operators/Boolean.py +27 -32
- vtlengine/Operators/CastOperator.py +177 -131
- vtlengine/Operators/Clause.py +137 -99
- vtlengine/Operators/Comparison.py +148 -117
- vtlengine/Operators/Conditional.py +149 -98
- vtlengine/Operators/General.py +68 -47
- vtlengine/Operators/HROperators.py +91 -72
- vtlengine/Operators/Join.py +217 -118
- vtlengine/Operators/Numeric.py +89 -44
- vtlengine/Operators/RoleSetter.py +16 -15
- vtlengine/Operators/Set.py +61 -36
- vtlengine/Operators/String.py +213 -139
- vtlengine/Operators/Time.py +334 -216
- vtlengine/Operators/Validation.py +117 -76
- vtlengine/Operators/__init__.py +340 -213
- vtlengine/Utils/__init__.py +195 -40
- vtlengine/__init__.py +1 -1
- vtlengine/files/output/__init__.py +15 -6
- vtlengine/files/output/_time_period_representation.py +10 -9
- vtlengine/files/parser/__init__.py +77 -52
- vtlengine/files/parser/_rfc_dialect.py +6 -5
- vtlengine/files/parser/_time_checking.py +46 -37
- vtlengine-1.0.1.dist-info/METADATA +236 -0
- vtlengine-1.0.1.dist-info/RECORD +58 -0
- {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/WHEEL +1 -1
- vtlengine-1.0.dist-info/METADATA +0 -104
- vtlengine-1.0.dist-info/RECORD +0 -58
- {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/LICENSE.md +0 -0
vtlengine/Operators/__init__.py
CHANGED
|
@@ -1,19 +1,29 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from copy import copy
|
|
3
|
-
from typing import Any, Union
|
|
4
|
-
|
|
5
|
-
from vtlengine.DataTypes import
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
3
|
+
from typing import Any, Union, Optional
|
|
4
|
+
|
|
5
|
+
from vtlengine.DataTypes import (
|
|
6
|
+
COMP_NAME_MAPPING,
|
|
7
|
+
binary_implicit_promotion,
|
|
8
|
+
check_binary_implicit_promotion,
|
|
9
|
+
check_unary_implicit_promotion,
|
|
10
|
+
unary_implicit_promotion,
|
|
11
|
+
SCALAR_TYPES_CLASS_REVERSE,
|
|
12
|
+
)
|
|
13
|
+
from vtlengine.DataTypes.TimeHandling import (
|
|
14
|
+
TimeIntervalHandler,
|
|
15
|
+
TimePeriodHandler,
|
|
16
|
+
DURATION_MAPPING,
|
|
17
|
+
)
|
|
9
18
|
|
|
10
19
|
from vtlengine.AST.Grammar.tokens import CEIL, FLOOR, ROUND, EQ, NEQ, GT, GTE, LT, LTE, XOR, OR, AND
|
|
11
20
|
from vtlengine.Exceptions import SemanticError
|
|
12
21
|
|
|
13
|
-
if os.environ.get("SPARK", False):
|
|
14
|
-
|
|
15
|
-
else:
|
|
16
|
-
|
|
22
|
+
# if os.environ.get("SPARK", False):
|
|
23
|
+
# import pyspark.pandas as pd
|
|
24
|
+
# else:
|
|
25
|
+
# import pandas as pd
|
|
26
|
+
import pandas as pd
|
|
17
27
|
|
|
18
28
|
from vtlengine.Model import Component, Dataset, Role, Scalar, DataComponent, ScalarSet
|
|
19
29
|
|
|
@@ -31,35 +41,36 @@ only_semantic = False
|
|
|
31
41
|
|
|
32
42
|
class Operator:
|
|
33
43
|
"""Superclass for all operators"""
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
44
|
+
|
|
45
|
+
op: Any = None
|
|
46
|
+
py_op: Any = None
|
|
47
|
+
spark_op: Any = None
|
|
48
|
+
type_to_check: Any = None
|
|
49
|
+
return_type: Any = None
|
|
39
50
|
|
|
40
51
|
@classmethod
|
|
41
|
-
def analyze(cls, *args, **kwargs):
|
|
52
|
+
def analyze(cls, *args: Any, **kwargs: Any) -> Any:
|
|
42
53
|
if only_semantic:
|
|
43
54
|
return cls.validate(*args, **kwargs)
|
|
44
55
|
return cls.evaluate(*args, **kwargs)
|
|
45
56
|
|
|
46
57
|
@classmethod
|
|
47
|
-
def cast_time_types(cls, data_type:
|
|
58
|
+
def cast_time_types(cls, data_type: Any, series: Any) -> Any:
|
|
59
|
+
|
|
48
60
|
if cls.op not in BINARY_COMPARISON_OPERATORS:
|
|
49
61
|
return series
|
|
50
62
|
if data_type.__name__ == "TimeInterval":
|
|
51
|
-
series = series.map(
|
|
52
|
-
|
|
63
|
+
series = series.map(
|
|
64
|
+
lambda x: TimeIntervalHandler.from_iso_format(x), na_action="ignore"
|
|
65
|
+
)
|
|
53
66
|
elif data_type.__name__ == "TimePeriod":
|
|
54
|
-
series = series.map(lambda x: TimePeriodHandler(x),
|
|
55
|
-
na_action='ignore')
|
|
67
|
+
series = series.map(lambda x: TimePeriodHandler(x), na_action="ignore")
|
|
56
68
|
elif data_type.__name__ == "Duration":
|
|
57
|
-
series = series.map(lambda x: DURATION_MAPPING[x],
|
|
58
|
-
na_action='ignore')
|
|
69
|
+
series = series.map(lambda x: DURATION_MAPPING[x], na_action="ignore")
|
|
59
70
|
return series
|
|
60
71
|
|
|
61
72
|
@classmethod
|
|
62
|
-
def cast_time_types_scalar(cls, data_type:
|
|
73
|
+
def cast_time_types_scalar(cls, data_type: Any, value: str) -> Any:
|
|
63
74
|
if cls.op not in BINARY_COMPARISON_OPERATORS:
|
|
64
75
|
return value
|
|
65
76
|
if data_type.__name__ == "TimeInterval":
|
|
@@ -75,55 +86,59 @@ class Operator:
|
|
|
75
86
|
@classmethod
|
|
76
87
|
def modify_measure_column(cls, result: Dataset) -> None:
|
|
77
88
|
"""
|
|
78
|
-
If an Operator change the data type of the Variable it is applied to (e.g., from string to
|
|
79
|
-
the result Data Set cannot maintain this Variable as it happens in the previous
|
|
80
|
-
because a Variable cannot have different data types in different Data Sets.
|
|
81
|
-
As a consequence, the converted variable cannot follow the same rules described in the
|
|
82
|
-
in the result Data Set, by another Variable of the
|
|
83
|
-
|
|
84
|
-
|
|
89
|
+
If an Operator change the data type of the Variable it is applied to (e.g., from string to
|
|
90
|
+
number), the result Data Set cannot maintain this Variable as it happens in the previous
|
|
91
|
+
cases, because a Variable cannot have different data types in different Data Sets.
|
|
92
|
+
As a consequence, the converted variable cannot follow the same rules described in the
|
|
93
|
+
sections above and must be replaced, in the result Data Set, by another Variable of the
|
|
94
|
+
proper data type.
|
|
95
|
+
For sake of simplicity, the operators changing the data type are allowed only on
|
|
96
|
+
mono-measure operand Data Sets, so that the conversion happens on just one Measure.
|
|
97
|
+
A default generic Measure is assigned by default to the result Data Set, depending on the
|
|
98
|
+
data type of the result (the default Measure Variables are reported in the table below).
|
|
85
99
|
|
|
86
100
|
Function used by the evaluate function when a dataset is involved
|
|
87
101
|
"""
|
|
88
|
-
|
|
102
|
+
|
|
103
|
+
if len(result.get_measures()) == 1 and cls.return_type is not None and result is not None:
|
|
89
104
|
measure_name = result.get_measures_names()[0]
|
|
90
105
|
components = list(result.components.keys())
|
|
91
|
-
columns = list(result.data.columns)
|
|
106
|
+
columns = list(result.data.columns) if result.data is not None else []
|
|
92
107
|
for column in columns:
|
|
93
|
-
if column not in set(components):
|
|
108
|
+
if column not in set(components) and result.data is not None:
|
|
94
109
|
result.data[measure_name] = result.data[column]
|
|
95
110
|
del result.data[column]
|
|
96
111
|
|
|
97
112
|
@classmethod
|
|
98
|
-
def validate_dataset_type(cls,
|
|
113
|
+
def validate_dataset_type(cls, *args: Any) -> None:
|
|
99
114
|
raise Exception("Method should be implemented by inheritors")
|
|
100
115
|
|
|
101
116
|
@classmethod
|
|
102
|
-
def validate_component_type(cls,
|
|
117
|
+
def validate_component_type(cls, *args: Any) -> None:
|
|
103
118
|
raise Exception("Method should be implemented by inheritors")
|
|
104
119
|
|
|
105
120
|
@classmethod
|
|
106
|
-
def validate_scalar_type(cls,
|
|
121
|
+
def validate_scalar_type(cls, *args: Any) -> None:
|
|
107
122
|
raise Exception("Method should be implemented by inheritors")
|
|
108
123
|
|
|
109
124
|
@classmethod
|
|
110
|
-
def validate(cls, *args, **kwargs):
|
|
125
|
+
def validate(cls, *args: Any, **kwargs: Any) -> Any:
|
|
111
126
|
raise Exception("Method should be implemented by inheritors")
|
|
112
127
|
|
|
113
128
|
@classmethod
|
|
114
|
-
def evaluate(cls, *args, **kwargs):
|
|
129
|
+
def evaluate(cls, *args: Any, **kwargs: Any) -> Any:
|
|
115
130
|
raise Exception("Method should be implemented by inheritors")
|
|
116
131
|
|
|
117
132
|
@classmethod
|
|
118
|
-
def scalar_validation(cls, *args) ->
|
|
133
|
+
def scalar_validation(cls, *args: Any) -> Any:
|
|
119
134
|
raise Exception("Method should be implemented by inheritors")
|
|
120
135
|
|
|
121
136
|
@classmethod
|
|
122
|
-
def component_validation(cls, *args) ->
|
|
137
|
+
def component_validation(cls, *args: Any) -> Any:
|
|
123
138
|
raise Exception("Method should be implemented by inheritors")
|
|
124
139
|
|
|
125
140
|
@classmethod
|
|
126
|
-
def validate_type_compatibility(cls, *args) -> bool:
|
|
141
|
+
def validate_type_compatibility(cls, *args: Any) -> bool:
|
|
127
142
|
if len(args) == 1:
|
|
128
143
|
operand = args[0]
|
|
129
144
|
return check_unary_implicit_promotion(operand, cls.type_to_check, cls.return_type)
|
|
@@ -133,7 +148,7 @@ class Operator:
|
|
|
133
148
|
raise Exception("Method should be implemented by inheritors")
|
|
134
149
|
|
|
135
150
|
@classmethod
|
|
136
|
-
def type_validation(cls, *args) ->
|
|
151
|
+
def type_validation(cls, *args: Any) -> Any:
|
|
137
152
|
if len(args) == 1:
|
|
138
153
|
operand = args[0]
|
|
139
154
|
return unary_implicit_promotion(operand, cls.type_to_check, cls.return_type)
|
|
@@ -143,26 +158,37 @@ class Operator:
|
|
|
143
158
|
raise Exception("Method should be implemented by inheritors")
|
|
144
159
|
|
|
145
160
|
@classmethod
|
|
146
|
-
def apply_return_type_dataset(cls, *args) -> None:
|
|
161
|
+
def apply_return_type_dataset(cls, *args: Any) -> None:
|
|
147
162
|
raise Exception("Method should be implemented by inheritors")
|
|
148
163
|
|
|
149
164
|
@classmethod
|
|
150
|
-
def apply_return_type(cls, *args):
|
|
165
|
+
def apply_return_type(cls, *args: Any) -> None:
|
|
151
166
|
raise Exception("Method should be implemented by inheritors")
|
|
152
167
|
|
|
153
168
|
|
|
154
|
-
def _id_type_promotion_join_keys(
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
169
|
+
def _id_type_promotion_join_keys(
|
|
170
|
+
c_left: Component,
|
|
171
|
+
c_right: Component,
|
|
172
|
+
join_key: str,
|
|
173
|
+
left_data: Optional[pd.DataFrame] = None,
|
|
174
|
+
right_data: Optional[pd.DataFrame] = None,
|
|
175
|
+
) -> None:
|
|
176
|
+
|
|
177
|
+
if left_data is None:
|
|
178
|
+
left_data = pd.DataFrame()
|
|
179
|
+
if right_data is None:
|
|
180
|
+
right_data = pd.DataFrame()
|
|
181
|
+
|
|
182
|
+
left_type_name: str = str(c_left.data_type.__name__)
|
|
183
|
+
right_type_name: str = str(c_right.data_type.__name__)
|
|
159
184
|
|
|
160
185
|
if left_type_name == right_type_name or len(left_data) == 0 or len(right_data) == 0:
|
|
161
186
|
left_data[join_key] = left_data[join_key].astype(object)
|
|
162
187
|
right_data[join_key] = right_data[join_key].astype(object)
|
|
163
188
|
return
|
|
164
|
-
if (
|
|
165
|
-
|
|
189
|
+
if (left_type_name == "Integer" and right_type_name == "Number") or (
|
|
190
|
+
left_type_name == "Number" and right_type_name == "Integer"
|
|
191
|
+
):
|
|
166
192
|
left_data[join_key] = left_data[join_key].map(lambda x: int(float(x)))
|
|
167
193
|
right_data[join_key] = right_data[join_key].map(lambda x: int(float(x)))
|
|
168
194
|
elif left_type_name == "String" and right_type_name in ("Integer", "Number"):
|
|
@@ -173,7 +199,8 @@ def _id_type_promotion_join_keys(c_left: Component, c_right: Component, join_key
|
|
|
173
199
|
right_data[join_key] = right_data[join_key].astype(object)
|
|
174
200
|
|
|
175
201
|
|
|
176
|
-
def _handle_str_number(x: Union[str, int, float]) -> Union[int, float]:
|
|
202
|
+
def _handle_str_number(x: Union[str, int, float]) -> Union[str, int, float]:
|
|
203
|
+
|
|
177
204
|
if isinstance(x, int):
|
|
178
205
|
return x
|
|
179
206
|
try:
|
|
@@ -188,15 +215,16 @@ def _handle_str_number(x: Union[str, int, float]) -> Union[int, float]:
|
|
|
188
215
|
class Binary(Operator):
|
|
189
216
|
|
|
190
217
|
@classmethod
|
|
191
|
-
def op_func(cls,
|
|
218
|
+
def op_func(cls, *args: Any) -> Any:
|
|
219
|
+
x, y = args
|
|
220
|
+
|
|
192
221
|
if pd.isnull(x) or pd.isnull(y):
|
|
193
222
|
return None
|
|
194
223
|
return cls.py_op(x, y)
|
|
195
224
|
|
|
196
225
|
@classmethod
|
|
197
|
-
def apply_operation_two_series(cls,
|
|
198
|
-
|
|
199
|
-
right_series: Any) -> Any:
|
|
226
|
+
def apply_operation_two_series(cls, left_series: Any, right_series: Any) -> Any:
|
|
227
|
+
|
|
200
228
|
if os.getenv("SPARK", False):
|
|
201
229
|
if cls.spark_op is None:
|
|
202
230
|
cls.spark_op = cls.py_op
|
|
@@ -206,26 +234,32 @@ class Binary(Operator):
|
|
|
206
234
|
result.loc[nulls] = None
|
|
207
235
|
return result
|
|
208
236
|
result = list(map(cls.op_func, left_series.values, right_series.values))
|
|
209
|
-
return pd.Series(result, index=list(range(len(result))),
|
|
210
|
-
dtype=object)
|
|
237
|
+
return pd.Series(result, index=list(range(len(result))), dtype=object)
|
|
211
238
|
|
|
212
239
|
@classmethod
|
|
213
|
-
def apply_operation_series_scalar(
|
|
214
|
-
|
|
240
|
+
def apply_operation_series_scalar(
|
|
241
|
+
cls,
|
|
242
|
+
series: Any,
|
|
243
|
+
scalar: Scalar,
|
|
244
|
+
series_left: bool,
|
|
245
|
+
) -> Any:
|
|
246
|
+
|
|
215
247
|
if scalar is None:
|
|
216
248
|
return pd.Series(None, index=series.index)
|
|
217
249
|
if series_left:
|
|
218
|
-
return series.map(lambda x: cls.py_op(x, scalar), na_action=
|
|
250
|
+
return series.map(lambda x: cls.py_op(x, scalar), na_action="ignore")
|
|
219
251
|
else:
|
|
220
|
-
return series.map(lambda x: cls.py_op(scalar, x), na_action=
|
|
252
|
+
return series.map(lambda x: cls.py_op(scalar, x), na_action="ignore")
|
|
221
253
|
|
|
222
254
|
@classmethod
|
|
223
|
-
def validate(cls,
|
|
255
|
+
def validate(cls, *args: Any) -> Any:
|
|
224
256
|
"""
|
|
225
257
|
The main function for validate, applies the implicit promotion (or check it), and
|
|
226
258
|
can do a semantic check too.
|
|
227
259
|
Returns an operand.
|
|
228
260
|
"""
|
|
261
|
+
left_operand, right_operand = args
|
|
262
|
+
|
|
229
263
|
if isinstance(left_operand, Dataset) and isinstance(right_operand, Dataset):
|
|
230
264
|
return cls.dataset_validation(left_operand, right_operand)
|
|
231
265
|
if isinstance(left_operand, Dataset) and isinstance(right_operand, Scalar):
|
|
@@ -234,29 +268,23 @@ class Binary(Operator):
|
|
|
234
268
|
return cls.dataset_scalar_validation(right_operand, left_operand)
|
|
235
269
|
if isinstance(left_operand, Scalar) and isinstance(right_operand, Scalar):
|
|
236
270
|
return cls.scalar_validation(left_operand, right_operand)
|
|
237
|
-
|
|
238
271
|
if isinstance(left_operand, DataComponent) and isinstance(right_operand, DataComponent):
|
|
239
272
|
return cls.component_validation(left_operand, right_operand)
|
|
240
|
-
|
|
241
273
|
if isinstance(left_operand, DataComponent) and isinstance(right_operand, Scalar):
|
|
242
274
|
return cls.component_scalar_validation(left_operand, right_operand)
|
|
243
|
-
|
|
244
275
|
if isinstance(left_operand, Scalar) and isinstance(right_operand, DataComponent):
|
|
245
276
|
return cls.component_scalar_validation(right_operand, left_operand)
|
|
246
|
-
|
|
247
277
|
# In operator
|
|
248
|
-
|
|
249
278
|
if isinstance(left_operand, Dataset) and isinstance(right_operand, ScalarSet):
|
|
250
279
|
return cls.dataset_set_validation(left_operand, right_operand)
|
|
251
|
-
|
|
252
280
|
if isinstance(left_operand, Scalar) and isinstance(right_operand, ScalarSet):
|
|
253
281
|
return cls.scalar_set_validation(left_operand, right_operand)
|
|
254
|
-
|
|
255
282
|
if isinstance(left_operand, DataComponent) and isinstance(right_operand, ScalarSet):
|
|
256
283
|
return cls.component_set_validation(left_operand, right_operand)
|
|
257
284
|
|
|
258
285
|
@classmethod
|
|
259
286
|
def dataset_validation(cls, left_operand: Dataset, right_operand: Dataset) -> Dataset:
|
|
287
|
+
|
|
260
288
|
left_identifiers = left_operand.get_identifiers_names()
|
|
261
289
|
right_identifiers = right_operand.get_identifiers_names()
|
|
262
290
|
|
|
@@ -268,8 +296,9 @@ class Binary(Operator):
|
|
|
268
296
|
right_measures_names = [measure.name for measure in right_measures]
|
|
269
297
|
|
|
270
298
|
if left_measures_names != right_measures_names:
|
|
271
|
-
raise SemanticError(
|
|
272
|
-
|
|
299
|
+
raise SemanticError(
|
|
300
|
+
"1-1-14-1", op=cls.op, left=left_measures_names, right=right_measures_names
|
|
301
|
+
)
|
|
273
302
|
elif len(left_measures) == 0:
|
|
274
303
|
raise SemanticError("1-1-1-8", op=cls.op, name=left_operand.name)
|
|
275
304
|
for left_measure, right_measure in zip(left_measures, right_measures):
|
|
@@ -288,9 +317,11 @@ class Binary(Operator):
|
|
|
288
317
|
# Deleting extra identifiers that we do not need anymore
|
|
289
318
|
|
|
290
319
|
base_operand = right_operand if use_right_components else left_operand
|
|
291
|
-
result_components = {
|
|
292
|
-
|
|
293
|
-
|
|
320
|
+
result_components = {
|
|
321
|
+
component_name: copy(component)
|
|
322
|
+
for component_name, component in base_operand.components.items()
|
|
323
|
+
if component.role in [Role.IDENTIFIER, Role.MEASURE]
|
|
324
|
+
}
|
|
294
325
|
|
|
295
326
|
for comp in [x for x in result_components.values() if x.role == Role.MEASURE]:
|
|
296
327
|
if comp.name in left_operand.components and comp.name in right_operand.components:
|
|
@@ -303,57 +334,66 @@ class Binary(Operator):
|
|
|
303
334
|
return result_dataset
|
|
304
335
|
|
|
305
336
|
@classmethod
|
|
306
|
-
def dataset_scalar_validation(cls, dataset: Dataset, scalar: Scalar):
|
|
337
|
+
def dataset_scalar_validation(cls, dataset: Dataset, scalar: Scalar) -> Dataset:
|
|
338
|
+
|
|
307
339
|
if len(dataset.get_measures()) == 0:
|
|
308
340
|
raise SemanticError("1-1-1-8", op=cls.op, name=dataset.name)
|
|
309
341
|
|
|
310
|
-
result_components = {
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
342
|
+
result_components = {
|
|
343
|
+
comp_name: copy(comp)
|
|
344
|
+
for comp_name, comp in dataset.components.items()
|
|
345
|
+
if comp.role in [Role.IDENTIFIER, Role.MEASURE]
|
|
346
|
+
}
|
|
347
|
+
result_dataset = Dataset(name="result", components=result_components, data=None)
|
|
316
348
|
cls.apply_return_type_dataset(result_dataset, dataset, scalar)
|
|
317
349
|
return result_dataset
|
|
318
350
|
|
|
319
351
|
@classmethod
|
|
320
352
|
def scalar_validation(cls, left_operand: Scalar, right_operand: Scalar) -> Scalar:
|
|
321
|
-
if not cls.validate_type_compatibility(left_operand.data_type, right_operand.data_type):
|
|
322
|
-
raise SemanticError("1-1-1-2", )
|
|
323
353
|
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
354
|
+
if not cls.validate_type_compatibility(left_operand.data_type, right_operand.data_type):
|
|
355
|
+
raise SemanticError(
|
|
356
|
+
"1-1-1-2",
|
|
357
|
+
)
|
|
358
|
+
return Scalar(
|
|
359
|
+
name="result",
|
|
360
|
+
data_type=cls.type_validation(left_operand.data_type, right_operand.data_type),
|
|
361
|
+
value=None,
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
@classmethod
|
|
365
|
+
def component_validation(
|
|
366
|
+
cls, left_operand: DataComponent, right_operand: DataComponent
|
|
367
|
+
) -> DataComponent:
|
|
332
368
|
"""
|
|
333
369
|
Validates the compatibility between the types of the components and the operator
|
|
334
370
|
:param left_operand: The left component
|
|
335
371
|
:param right_operand: The right component
|
|
336
372
|
:return: The result data type of the validation
|
|
337
373
|
"""
|
|
338
|
-
result_data_type = cls.type_validation(left_operand.data_type, right_operand.data_type)
|
|
339
374
|
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
375
|
+
result_data_type = cls.type_validation(left_operand.data_type, right_operand.data_type)
|
|
376
|
+
result = DataComponent(
|
|
377
|
+
name="result",
|
|
378
|
+
data_type=result_data_type,
|
|
379
|
+
data=None,
|
|
380
|
+
role=left_operand.role,
|
|
381
|
+
nullable=(left_operand.nullable or right_operand.nullable),
|
|
382
|
+
)
|
|
345
383
|
|
|
346
384
|
return result
|
|
347
385
|
|
|
348
386
|
@classmethod
|
|
349
|
-
def component_scalar_validation(cls, component: DataComponent, scalar: Scalar):
|
|
350
|
-
cls.type_validation(component.data_type, scalar.data_type)
|
|
351
|
-
|
|
352
|
-
result = DataComponent(name=component.name,
|
|
353
|
-
data_type=cls.type_validation(component.data_type, scalar.data_type),
|
|
354
|
-
data=None, role=component.role,
|
|
355
|
-
nullable=component.nullable or scalar is None)
|
|
387
|
+
def component_scalar_validation(cls, component: DataComponent, scalar: Scalar) -> DataComponent:
|
|
356
388
|
|
|
389
|
+
cls.type_validation(component.data_type, scalar.data_type)
|
|
390
|
+
result = DataComponent(
|
|
391
|
+
name=component.name,
|
|
392
|
+
data_type=cls.type_validation(component.data_type, scalar.data_type),
|
|
393
|
+
data=None,
|
|
394
|
+
role=component.role,
|
|
395
|
+
nullable=component.nullable or scalar is None,
|
|
396
|
+
)
|
|
357
397
|
return result
|
|
358
398
|
|
|
359
399
|
@classmethod
|
|
@@ -363,37 +403,44 @@ class Binary(Operator):
|
|
|
363
403
|
raise SemanticError("1-1-1-8", op=cls.op, name=dataset.name)
|
|
364
404
|
for measure in dataset.get_measures():
|
|
365
405
|
cls.type_validation(measure.data_type, scalar_set.data_type)
|
|
406
|
+
result_components = {
|
|
407
|
+
comp_name: copy(comp)
|
|
408
|
+
for comp_name, comp in dataset.components.items()
|
|
409
|
+
if comp.role in [Role.IDENTIFIER, Role.MEASURE]
|
|
410
|
+
}
|
|
366
411
|
|
|
367
|
-
|
|
368
|
-
dataset.components.items() if
|
|
369
|
-
comp.role in [Role.IDENTIFIER, Role.MEASURE]}
|
|
370
|
-
|
|
371
|
-
result_dataset = Dataset(name="result", components=result_components,
|
|
372
|
-
data=None)
|
|
412
|
+
result_dataset = Dataset(name="result", components=result_components, data=None)
|
|
373
413
|
cls.apply_return_type_dataset(result_dataset, dataset, scalar_set)
|
|
374
414
|
return result_dataset
|
|
375
415
|
|
|
376
416
|
@classmethod
|
|
377
|
-
def component_set_validation(
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
result = DataComponent(name="result", data_type=cls.type_validation(component.data_type,
|
|
381
|
-
scalar_set.data_type),
|
|
382
|
-
data=None,
|
|
383
|
-
role=Role.MEASURE, nullable=component.nullable)
|
|
417
|
+
def component_set_validation(
|
|
418
|
+
cls, component: DataComponent, scalar_set: ScalarSet
|
|
419
|
+
) -> DataComponent:
|
|
384
420
|
|
|
421
|
+
cls.type_validation(component.data_type, scalar_set.data_type)
|
|
422
|
+
result = DataComponent(
|
|
423
|
+
name="result",
|
|
424
|
+
data_type=cls.type_validation(component.data_type, scalar_set.data_type),
|
|
425
|
+
data=None,
|
|
426
|
+
role=Role.MEASURE,
|
|
427
|
+
nullable=component.nullable,
|
|
428
|
+
)
|
|
385
429
|
return result
|
|
386
430
|
|
|
387
431
|
@classmethod
|
|
388
|
-
def scalar_set_validation(cls, scalar: Scalar, scalar_set: ScalarSet):
|
|
432
|
+
def scalar_set_validation(cls, scalar: Scalar, scalar_set: ScalarSet) -> Scalar:
|
|
433
|
+
|
|
389
434
|
cls.type_validation(scalar.data_type, scalar_set.data_type)
|
|
390
|
-
return Scalar(
|
|
391
|
-
|
|
392
|
-
|
|
435
|
+
return Scalar(
|
|
436
|
+
name="result",
|
|
437
|
+
data_type=cls.type_validation(scalar.data_type, scalar_set.data_type),
|
|
438
|
+
value=None,
|
|
439
|
+
)
|
|
393
440
|
|
|
394
441
|
# The following class method implements the type promotion
|
|
395
442
|
@classmethod
|
|
396
|
-
def type_validation(cls, left_type:
|
|
443
|
+
def type_validation(cls, left_type: Any, right_type: Any) -> Any:
|
|
397
444
|
"""
|
|
398
445
|
Validates the compatibility between the types of the operands and the operator
|
|
399
446
|
and give us the result ScalarType of the promotion
|
|
@@ -404,11 +451,12 @@ class Binary(Operator):
|
|
|
404
451
|
|
|
405
452
|
:return: result ScalarType or exception
|
|
406
453
|
"""
|
|
454
|
+
|
|
407
455
|
return binary_implicit_promotion(left_type, right_type, cls.type_to_check, cls.return_type)
|
|
408
456
|
|
|
409
457
|
# The following class method checks the type promotion
|
|
410
458
|
@classmethod
|
|
411
|
-
def validate_type_compatibility(cls, left:
|
|
459
|
+
def validate_type_compatibility(cls, left: Any, right: Any) -> bool:
|
|
412
460
|
"""
|
|
413
461
|
Validates the compatibility between the types of the operands and the operator
|
|
414
462
|
(implicit type promotion : check_binary_implicit_type_promotion)
|
|
@@ -418,17 +466,19 @@ class Binary(Operator):
|
|
|
418
466
|
|
|
419
467
|
:return: True if the types are compatible, False otherwise
|
|
420
468
|
"""
|
|
469
|
+
|
|
421
470
|
return check_binary_implicit_promotion(left, right, cls.type_to_check, cls.return_type)
|
|
422
471
|
|
|
423
472
|
@classmethod
|
|
424
473
|
def apply_return_type_dataset(
|
|
425
|
-
|
|
426
|
-
right_operand: Union[Dataset, Scalar, ScalarSet]
|
|
474
|
+
cls, result_dataset: Dataset, left_operand: Any, right_operand: Any
|
|
427
475
|
) -> None:
|
|
428
476
|
"""
|
|
429
477
|
Used in dataset's validation.
|
|
430
|
-
Changes the result dataset and give us his final form
|
|
478
|
+
Changes the result dataset and give us his final form
|
|
479
|
+
(#TODO: write this explanation in a better way)
|
|
431
480
|
"""
|
|
481
|
+
|
|
432
482
|
changed_allowed = cls.op in MONOMEASURE_CHANGED_ALLOWED
|
|
433
483
|
is_mono_measure = len(result_dataset.get_measures()) == 1
|
|
434
484
|
for measure in result_dataset.get_measures():
|
|
@@ -444,22 +494,24 @@ class Binary(Operator):
|
|
|
444
494
|
name=COMP_NAME_MAPPING[result_data_type],
|
|
445
495
|
data_type=result_data_type,
|
|
446
496
|
role=Role.MEASURE,
|
|
447
|
-
nullable=measure.nullable
|
|
497
|
+
nullable=measure.nullable,
|
|
448
498
|
)
|
|
449
499
|
result_dataset.delete_component(measure.name)
|
|
450
500
|
result_dataset.add_component(component)
|
|
451
501
|
if result_dataset.data is not None:
|
|
452
502
|
result_dataset.data.rename(columns={measure.name: component.name}, inplace=True)
|
|
453
|
-
elif (
|
|
454
|
-
|
|
455
|
-
|
|
503
|
+
elif (
|
|
504
|
+
changed_allowed is False
|
|
505
|
+
and is_mono_measure is False
|
|
506
|
+
and left_type.promotion_changed_type(result_data_type)
|
|
456
507
|
):
|
|
457
508
|
raise SemanticError("1-1-1-4", op=cls.op)
|
|
458
509
|
else:
|
|
459
510
|
measure.data_type = result_data_type
|
|
460
511
|
|
|
461
512
|
@classmethod
|
|
462
|
-
def dataset_evaluation(cls, left_operand: Dataset, right_operand: Dataset):
|
|
513
|
+
def dataset_evaluation(cls, left_operand: Dataset, right_operand: Dataset) -> Dataset:
|
|
514
|
+
|
|
463
515
|
result_dataset = cls.dataset_validation(left_operand, right_operand)
|
|
464
516
|
|
|
465
517
|
use_right_as_base = False
|
|
@@ -471,49 +523,65 @@ class Binary(Operator):
|
|
|
471
523
|
base_operand_data = left_operand.data
|
|
472
524
|
other_operand_data = right_operand.data
|
|
473
525
|
|
|
474
|
-
join_keys = list(
|
|
475
|
-
|
|
526
|
+
join_keys = list(
|
|
527
|
+
set(left_operand.get_identifiers_names()).intersection(
|
|
528
|
+
right_operand.get_identifiers_names()
|
|
529
|
+
)
|
|
530
|
+
)
|
|
476
531
|
|
|
477
532
|
for join_key in join_keys:
|
|
478
|
-
_id_type_promotion_join_keys(
|
|
479
|
-
|
|
480
|
-
|
|
533
|
+
_id_type_promotion_join_keys(
|
|
534
|
+
left_operand.get_component(join_key),
|
|
535
|
+
right_operand.get_component(join_key),
|
|
536
|
+
join_key,
|
|
537
|
+
base_operand_data,
|
|
538
|
+
other_operand_data,
|
|
539
|
+
)
|
|
481
540
|
|
|
482
541
|
try:
|
|
483
542
|
# Merge the data
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
543
|
+
if base_operand_data is None or other_operand_data is None:
|
|
544
|
+
result_data: pd.DataFrame = pd.DataFrame()
|
|
545
|
+
else:
|
|
546
|
+
result_data = pd.merge(
|
|
547
|
+
base_operand_data,
|
|
548
|
+
other_operand_data,
|
|
549
|
+
how="inner",
|
|
550
|
+
on=join_keys,
|
|
551
|
+
suffixes=("_x", "_y"),
|
|
552
|
+
)
|
|
488
553
|
except ValueError as e:
|
|
489
554
|
raise Exception(f"Error merging datasets on Binary Operator: {str(e)}")
|
|
490
555
|
|
|
491
556
|
# Measures are the same, using left operand measures names
|
|
492
557
|
for measure in left_operand.get_measures():
|
|
493
|
-
result_data[measure.name +
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
558
|
+
result_data[measure.name + "_x"] = cls.cast_time_types(
|
|
559
|
+
measure.data_type, result_data[measure.name + "_x"]
|
|
560
|
+
)
|
|
561
|
+
result_data[measure.name + "_y"] = cls.cast_time_types(
|
|
562
|
+
measure.data_type, result_data[measure.name + "_y"]
|
|
563
|
+
)
|
|
497
564
|
if use_right_as_base:
|
|
498
565
|
result_data[measure.name] = cls.apply_operation_two_series(
|
|
499
|
-
result_data[measure.name +
|
|
500
|
-
|
|
566
|
+
result_data[measure.name + "_y"], result_data[measure.name + "_x"]
|
|
567
|
+
)
|
|
501
568
|
else:
|
|
502
569
|
result_data[measure.name] = cls.apply_operation_two_series(
|
|
503
|
-
result_data[measure.name +
|
|
504
|
-
|
|
505
|
-
result_data = result_data.drop([measure.name +
|
|
570
|
+
result_data[measure.name + "_x"], result_data[measure.name + "_y"]
|
|
571
|
+
)
|
|
572
|
+
result_data = result_data.drop([measure.name + "_x", measure.name + "_y"], axis=1)
|
|
506
573
|
|
|
507
574
|
# Delete attributes from the result data
|
|
508
575
|
attributes = list(
|
|
509
|
-
set(left_operand.get_attributes_names()).union(right_operand.get_attributes_names())
|
|
576
|
+
set(left_operand.get_attributes_names()).union(right_operand.get_attributes_names())
|
|
577
|
+
)
|
|
510
578
|
for att in attributes:
|
|
511
579
|
if att in result_data.columns:
|
|
512
580
|
result_data = result_data.drop(att, axis=1)
|
|
513
|
-
if att +
|
|
514
|
-
result_data = result_data.drop(att +
|
|
515
|
-
if att +
|
|
516
|
-
result_data = result_data.drop(att +
|
|
581
|
+
if att + "_x" in result_data.columns:
|
|
582
|
+
result_data = result_data.drop(att + "_x", axis=1)
|
|
583
|
+
if att + "_y" in result_data.columns:
|
|
584
|
+
result_data = result_data.drop(att + "_y", axis=1)
|
|
517
585
|
|
|
518
586
|
result_dataset.data = result_data
|
|
519
587
|
cls.modify_measure_column(result_dataset)
|
|
@@ -522,25 +590,31 @@ class Binary(Operator):
|
|
|
522
590
|
|
|
523
591
|
@classmethod
|
|
524
592
|
def scalar_evaluation(cls, left_operand: Scalar, right_operand: Scalar) -> Scalar:
|
|
593
|
+
|
|
525
594
|
result_scalar = cls.scalar_validation(left_operand, right_operand)
|
|
526
595
|
result_scalar.value = cls.op_func(left_operand.value, right_operand.value)
|
|
527
596
|
return result_scalar
|
|
528
597
|
|
|
529
598
|
@classmethod
|
|
530
|
-
def dataset_scalar_evaluation(
|
|
531
|
-
|
|
599
|
+
def dataset_scalar_evaluation(
|
|
600
|
+
cls, dataset: Dataset, scalar: Scalar, dataset_left: bool = True
|
|
601
|
+
) -> Dataset:
|
|
602
|
+
|
|
532
603
|
result_dataset = cls.dataset_scalar_validation(dataset, scalar)
|
|
533
|
-
result_data = dataset.data.copy()
|
|
604
|
+
result_data = dataset.data.copy() if dataset.data is not None else pd.DataFrame()
|
|
534
605
|
result_dataset.data = result_data
|
|
535
606
|
|
|
536
607
|
scalar_value = cls.cast_time_types_scalar(scalar.data_type, scalar.value)
|
|
537
608
|
|
|
538
609
|
for measure in dataset.get_measures():
|
|
539
610
|
measure_data = cls.cast_time_types(measure.data_type, result_data[measure.name].copy())
|
|
540
|
-
if measure.data_type.__name__ == "Duration" and not isinstance(
|
|
611
|
+
if measure.data_type.__name__.__str__() == "Duration" and not isinstance(
|
|
612
|
+
scalar_value, int
|
|
613
|
+
):
|
|
541
614
|
scalar_value = DURATION_MAPPING[scalar_value]
|
|
542
615
|
result_dataset.data[measure.name] = cls.apply_operation_series_scalar(
|
|
543
|
-
measure_data, scalar_value, dataset_left
|
|
616
|
+
measure_data, scalar_value, dataset_left
|
|
617
|
+
)
|
|
544
618
|
|
|
545
619
|
result_dataset.data = result_data
|
|
546
620
|
cols_to_keep = dataset.get_identifiers_names() + dataset.get_measures_names()
|
|
@@ -549,34 +623,53 @@ class Binary(Operator):
|
|
|
549
623
|
return result_dataset
|
|
550
624
|
|
|
551
625
|
@classmethod
|
|
552
|
-
def component_evaluation(
|
|
553
|
-
|
|
626
|
+
def component_evaluation(
|
|
627
|
+
cls, left_operand: DataComponent, right_operand: DataComponent
|
|
628
|
+
) -> DataComponent:
|
|
629
|
+
|
|
554
630
|
result_component = cls.component_validation(left_operand, right_operand)
|
|
555
|
-
left_data = cls.cast_time_types(
|
|
556
|
-
|
|
631
|
+
left_data = cls.cast_time_types(
|
|
632
|
+
left_operand.data_type,
|
|
633
|
+
left_operand.data.copy() if left_operand.data is not None else pd.Series(),
|
|
634
|
+
)
|
|
635
|
+
right_data = cls.cast_time_types(
|
|
636
|
+
right_operand.data_type,
|
|
637
|
+
right_operand.data.copy() if right_operand.data is not None else pd.Series(),
|
|
638
|
+
)
|
|
557
639
|
result_component.data = cls.apply_operation_two_series(left_data, right_data)
|
|
558
640
|
return result_component
|
|
559
641
|
|
|
560
642
|
@classmethod
|
|
561
|
-
def component_scalar_evaluation(
|
|
562
|
-
|
|
643
|
+
def component_scalar_evaluation(
|
|
644
|
+
cls, component: DataComponent, scalar: Scalar, component_left: bool = True
|
|
645
|
+
) -> DataComponent:
|
|
646
|
+
|
|
563
647
|
result_component = cls.component_scalar_validation(component, scalar)
|
|
564
|
-
comp_data = cls.cast_time_types(
|
|
648
|
+
comp_data = cls.cast_time_types(
|
|
649
|
+
component.data_type,
|
|
650
|
+
component.data.copy() if component.data is not None else pd.Series(),
|
|
651
|
+
)
|
|
565
652
|
scalar_value = cls.cast_time_types_scalar(scalar.data_type, scalar.value)
|
|
566
|
-
if component.data_type.__name__ == "Duration" and not isinstance(
|
|
653
|
+
if component.data_type.__name__.__str__() == "Duration" and not isinstance(
|
|
654
|
+
scalar_value, int
|
|
655
|
+
):
|
|
567
656
|
scalar_value = DURATION_MAPPING[scalar_value]
|
|
568
|
-
result_component.data = cls.apply_operation_series_scalar(
|
|
569
|
-
|
|
657
|
+
result_component.data = cls.apply_operation_series_scalar(
|
|
658
|
+
comp_data, scalar_value, component_left
|
|
659
|
+
)
|
|
570
660
|
return result_component
|
|
571
661
|
|
|
572
662
|
@classmethod
|
|
573
663
|
def dataset_set_evaluation(cls, dataset: Dataset, scalar_set: ScalarSet) -> Dataset:
|
|
664
|
+
|
|
574
665
|
result_dataset = cls.dataset_set_validation(dataset, scalar_set)
|
|
575
|
-
result_data = dataset.data.copy()
|
|
666
|
+
result_data = dataset.data.copy() if dataset.data is not None else pd.DataFrame()
|
|
576
667
|
|
|
577
668
|
for measure_name in dataset.get_measures_names():
|
|
578
|
-
|
|
579
|
-
|
|
669
|
+
if dataset.data is not None:
|
|
670
|
+
result_data[measure_name] = cls.apply_operation_two_series(
|
|
671
|
+
dataset.data[measure_name], scalar_set
|
|
672
|
+
)
|
|
580
673
|
|
|
581
674
|
cols_to_keep = dataset.get_identifiers_names() + dataset.get_measures_names()
|
|
582
675
|
result_dataset.data = result_data[cols_to_keep]
|
|
@@ -585,28 +678,32 @@ class Binary(Operator):
|
|
|
585
678
|
return result_dataset
|
|
586
679
|
|
|
587
680
|
@classmethod
|
|
588
|
-
def component_set_evaluation(
|
|
589
|
-
|
|
681
|
+
def component_set_evaluation(
|
|
682
|
+
cls, component: DataComponent, scalar_set: ScalarSet
|
|
683
|
+
) -> DataComponent:
|
|
684
|
+
|
|
590
685
|
result_component = cls.component_set_validation(component, scalar_set)
|
|
591
|
-
result_component.data = cls.apply_operation_two_series(
|
|
592
|
-
|
|
686
|
+
result_component.data = cls.apply_operation_two_series(
|
|
687
|
+
component.data.copy() if component.data is not None else pd.Series(), scalar_set
|
|
688
|
+
)
|
|
593
689
|
return result_component
|
|
594
690
|
|
|
595
691
|
@classmethod
|
|
596
692
|
def scalar_set_evaluation(cls, scalar: Scalar, scalar_set: ScalarSet) -> Scalar:
|
|
693
|
+
|
|
597
694
|
result_scalar = cls.scalar_set_validation(scalar, scalar_set)
|
|
598
695
|
result_scalar.value = cls.op_func(scalar.value, scalar_set)
|
|
599
696
|
return result_scalar
|
|
600
697
|
|
|
601
698
|
@classmethod
|
|
602
|
-
def evaluate(cls, left_operand:
|
|
603
|
-
right_operand: ALL_MODEL_DATA_TYPES) -> ALL_MODEL_DATA_TYPES:
|
|
699
|
+
def evaluate(cls, left_operand: Any, right_operand: Any) -> Any:
|
|
604
700
|
"""
|
|
605
701
|
Evaluate the operation (based on validation output)
|
|
606
702
|
:param left_operand: The left operand
|
|
607
703
|
:param right_operand: The right operand
|
|
608
704
|
:return: The result of the operation
|
|
609
705
|
"""
|
|
706
|
+
|
|
610
707
|
if isinstance(left_operand, Dataset) and isinstance(right_operand, Dataset):
|
|
611
708
|
return cls.dataset_evaluation(left_operand, right_operand)
|
|
612
709
|
if isinstance(left_operand, Scalar) and isinstance(right_operand, Scalar):
|
|
@@ -615,23 +712,18 @@ class Binary(Operator):
|
|
|
615
712
|
return cls.dataset_scalar_evaluation(left_operand, right_operand, dataset_left=True)
|
|
616
713
|
if isinstance(left_operand, Scalar) and isinstance(right_operand, Dataset):
|
|
617
714
|
return cls.dataset_scalar_evaluation(right_operand, left_operand, dataset_left=False)
|
|
618
|
-
|
|
619
715
|
if isinstance(left_operand, DataComponent) and isinstance(right_operand, DataComponent):
|
|
620
716
|
return cls.component_evaluation(left_operand, right_operand)
|
|
621
|
-
|
|
622
717
|
if isinstance(left_operand, DataComponent) and isinstance(right_operand, Scalar):
|
|
623
718
|
return cls.component_scalar_evaluation(left_operand, right_operand, component_left=True)
|
|
624
|
-
|
|
625
719
|
if isinstance(left_operand, Scalar) and isinstance(right_operand, DataComponent):
|
|
626
|
-
return cls.component_scalar_evaluation(
|
|
627
|
-
|
|
628
|
-
|
|
720
|
+
return cls.component_scalar_evaluation(
|
|
721
|
+
right_operand, left_operand, component_left=False
|
|
722
|
+
)
|
|
629
723
|
if isinstance(left_operand, Dataset) and isinstance(right_operand, ScalarSet):
|
|
630
724
|
return cls.dataset_set_evaluation(left_operand, right_operand)
|
|
631
|
-
|
|
632
725
|
if isinstance(left_operand, DataComponent) and isinstance(right_operand, ScalarSet):
|
|
633
726
|
return cls.component_set_evaluation(left_operand, right_operand)
|
|
634
|
-
|
|
635
727
|
if isinstance(left_operand, Scalar) and isinstance(right_operand, ScalarSet):
|
|
636
728
|
return cls.scalar_set_evaluation(left_operand, right_operand)
|
|
637
729
|
|
|
@@ -639,21 +731,27 @@ class Binary(Operator):
|
|
|
639
731
|
class Unary(Operator):
|
|
640
732
|
|
|
641
733
|
@classmethod
|
|
642
|
-
def op_func(cls,
|
|
734
|
+
def op_func(cls, *args: Any) -> Any:
|
|
735
|
+
x = args[0]
|
|
736
|
+
|
|
643
737
|
return None if pd.isnull(x) else cls.py_op(x)
|
|
644
738
|
|
|
645
739
|
@classmethod
|
|
646
740
|
def apply_operation_component(cls, series: Any) -> Any:
|
|
647
|
-
"""
|
|
648
|
-
|
|
741
|
+
"""
|
|
742
|
+
Applies the operation to a component
|
|
743
|
+
"""
|
|
744
|
+
|
|
745
|
+
return series.map(cls.py_op, na_action="ignore")
|
|
649
746
|
|
|
650
747
|
@classmethod
|
|
651
|
-
def validate(cls, operand:
|
|
748
|
+
def validate(cls, operand: Any) -> Any:
|
|
652
749
|
"""
|
|
653
750
|
The main function for validate, applies the implicit promotion (or check it), and
|
|
654
751
|
can do a semantic check too.
|
|
655
752
|
Returns an operand.
|
|
656
753
|
"""
|
|
754
|
+
|
|
657
755
|
if isinstance(operand, Dataset):
|
|
658
756
|
return cls.dataset_validation(operand)
|
|
659
757
|
elif isinstance(operand, DataComponent):
|
|
@@ -663,12 +761,15 @@ class Unary(Operator):
|
|
|
663
761
|
|
|
664
762
|
@classmethod
|
|
665
763
|
def dataset_validation(cls, operand: Dataset) -> Dataset:
|
|
764
|
+
|
|
666
765
|
cls.validate_dataset_type(operand)
|
|
667
766
|
if len(operand.get_measures()) == 0:
|
|
668
767
|
raise SemanticError("1-1-1-8", op=cls.op, name=operand.name)
|
|
669
|
-
result_components = {
|
|
670
|
-
|
|
671
|
-
|
|
768
|
+
result_components = {
|
|
769
|
+
comp_name: copy(comp)
|
|
770
|
+
for comp_name, comp in operand.components.items()
|
|
771
|
+
if comp.role in [Role.IDENTIFIER, Role.MEASURE]
|
|
772
|
+
}
|
|
672
773
|
|
|
673
774
|
result_dataset = Dataset(name="result", components=result_components, data=None)
|
|
674
775
|
cls.apply_return_type_dataset(result_dataset, operand)
|
|
@@ -676,46 +777,64 @@ class Unary(Operator):
|
|
|
676
777
|
|
|
677
778
|
@classmethod
|
|
678
779
|
def scalar_validation(cls, operand: Scalar) -> Scalar:
|
|
780
|
+
|
|
679
781
|
result_type = cls.type_validation(operand.data_type)
|
|
680
782
|
result = Scalar(name="result", data_type=result_type, value=None)
|
|
681
783
|
return result
|
|
682
784
|
|
|
683
785
|
@classmethod
|
|
684
786
|
def component_validation(cls, operand: DataComponent) -> DataComponent:
|
|
787
|
+
|
|
685
788
|
result_type = cls.type_validation(operand.data_type)
|
|
686
|
-
result = DataComponent(
|
|
687
|
-
|
|
789
|
+
result = DataComponent(
|
|
790
|
+
name="result",
|
|
791
|
+
data_type=result_type,
|
|
792
|
+
data=None,
|
|
793
|
+
role=operand.role,
|
|
794
|
+
nullable=operand.nullable,
|
|
795
|
+
)
|
|
688
796
|
return result
|
|
689
797
|
|
|
690
798
|
# The following class method implements the type promotion
|
|
691
799
|
@classmethod
|
|
692
|
-
def type_validation(cls, operand:
|
|
800
|
+
def type_validation(cls, operand: Any) -> Any:
|
|
801
|
+
|
|
693
802
|
return unary_implicit_promotion(operand, cls.type_to_check, cls.return_type)
|
|
694
803
|
|
|
695
804
|
# The following class method checks the type promotion
|
|
696
805
|
@classmethod
|
|
697
|
-
def validate_type_compatibility(cls, operand:
|
|
806
|
+
def validate_type_compatibility(cls, operand: Any) -> bool:
|
|
807
|
+
|
|
698
808
|
return check_unary_implicit_promotion(operand, cls.type_to_check, cls.return_type)
|
|
699
809
|
|
|
700
810
|
@classmethod
|
|
701
811
|
def validate_dataset_type(cls, dataset: Dataset) -> None:
|
|
812
|
+
|
|
702
813
|
if cls.type_to_check is not None:
|
|
703
814
|
for measure in dataset.get_measures():
|
|
704
815
|
if not cls.validate_type_compatibility(measure.data_type):
|
|
705
|
-
raise SemanticError(
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
816
|
+
raise SemanticError(
|
|
817
|
+
"1-1-1-3",
|
|
818
|
+
op=cls.op,
|
|
819
|
+
entity=measure.role.value,
|
|
820
|
+
name=measure.name,
|
|
821
|
+
target_type=SCALAR_TYPES_CLASS_REVERSE[cls.type_to_check],
|
|
822
|
+
)
|
|
709
823
|
|
|
710
824
|
@classmethod
|
|
711
825
|
def validate_scalar_type(cls, scalar: Scalar) -> None:
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
raise SemanticError(
|
|
715
|
-
|
|
826
|
+
|
|
827
|
+
if cls.type_to_check is not None and not cls.validate_type_compatibility(scalar.data_type):
|
|
828
|
+
raise SemanticError(
|
|
829
|
+
"1-1-1-5",
|
|
830
|
+
op=cls.op,
|
|
831
|
+
name=scalar.name,
|
|
832
|
+
type=SCALAR_TYPES_CLASS_REVERSE[scalar.data_type],
|
|
833
|
+
)
|
|
716
834
|
|
|
717
835
|
@classmethod
|
|
718
836
|
def apply_return_type_dataset(cls, result_dataset: Dataset, operand: Dataset) -> None:
|
|
837
|
+
|
|
719
838
|
changed_allowed = cls.op in MONOMEASURE_CHANGED_ALLOWED
|
|
720
839
|
is_mono_measure = len(operand.get_measures()) == 1
|
|
721
840
|
for measure in result_dataset.get_measures():
|
|
@@ -727,20 +846,23 @@ class Unary(Operator):
|
|
|
727
846
|
name=COMP_NAME_MAPPING[result_data_type],
|
|
728
847
|
data_type=result_data_type,
|
|
729
848
|
role=Role.MEASURE,
|
|
730
|
-
nullable=measure.nullable
|
|
849
|
+
nullable=measure.nullable,
|
|
731
850
|
)
|
|
732
851
|
result_dataset.delete_component(measure.name)
|
|
733
852
|
result_dataset.add_component(component)
|
|
734
853
|
if result_dataset.data is not None:
|
|
735
854
|
result_dataset.data.rename(columns={measure.name: component.name}, inplace=True)
|
|
736
|
-
elif
|
|
737
|
-
|
|
855
|
+
elif (
|
|
856
|
+
changed_allowed is False
|
|
857
|
+
and is_mono_measure is False
|
|
858
|
+
and operand_type.promotion_changed_type(result_data_type)
|
|
859
|
+
):
|
|
738
860
|
raise SemanticError("1-1-1-4", op=cls.op)
|
|
739
861
|
else:
|
|
740
862
|
measure.data_type = result_data_type
|
|
741
863
|
|
|
742
864
|
@classmethod
|
|
743
|
-
def evaluate(cls, operand: ALL_MODEL_DATA_TYPES) ->
|
|
865
|
+
def evaluate(cls, operand: ALL_MODEL_DATA_TYPES) -> Any:
|
|
744
866
|
|
|
745
867
|
if isinstance(operand, Dataset):
|
|
746
868
|
return cls.dataset_evaluation(operand)
|
|
@@ -751,8 +873,9 @@ class Unary(Operator):
|
|
|
751
873
|
|
|
752
874
|
@classmethod
|
|
753
875
|
def dataset_evaluation(cls, operand: Dataset) -> Dataset:
|
|
876
|
+
|
|
754
877
|
result_dataset = cls.dataset_validation(operand)
|
|
755
|
-
result_data = operand.data.copy()
|
|
878
|
+
result_data = operand.data.copy() if operand.data is not None else pd.DataFrame()
|
|
756
879
|
for measure_name in operand.get_measures_names():
|
|
757
880
|
result_data[measure_name] = cls.apply_operation_component(result_data[measure_name])
|
|
758
881
|
|
|
@@ -765,12 +888,16 @@ class Unary(Operator):
|
|
|
765
888
|
|
|
766
889
|
@classmethod
|
|
767
890
|
def scalar_evaluation(cls, operand: Scalar) -> Scalar:
|
|
891
|
+
|
|
768
892
|
result_scalar = cls.scalar_validation(operand)
|
|
769
893
|
result_scalar.value = cls.op_func(operand.value)
|
|
770
894
|
return result_scalar
|
|
771
895
|
|
|
772
896
|
@classmethod
|
|
773
897
|
def component_evaluation(cls, operand: DataComponent) -> DataComponent:
|
|
898
|
+
|
|
774
899
|
result_component = cls.component_validation(operand)
|
|
775
|
-
result_component.data = cls.apply_operation_component(
|
|
900
|
+
result_component.data = cls.apply_operation_component(
|
|
901
|
+
operand.data.copy() if operand.data is not None else pd.Series()
|
|
902
|
+
)
|
|
776
903
|
return result_component
|