vtlengine 1.0__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +159 -102
- vtlengine/API/__init__.py +110 -68
- vtlengine/AST/ASTConstructor.py +188 -98
- vtlengine/AST/ASTConstructorModules/Expr.py +402 -205
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +248 -104
- vtlengine/AST/ASTConstructorModules/Terminals.py +158 -95
- vtlengine/AST/ASTEncoders.py +1 -1
- vtlengine/AST/ASTTemplate.py +24 -9
- vtlengine/AST/ASTVisitor.py +8 -12
- vtlengine/AST/DAG/__init__.py +43 -35
- vtlengine/AST/DAG/_words.py +4 -4
- vtlengine/AST/Grammar/Vtl.g4 +49 -20
- vtlengine/AST/Grammar/VtlTokens.g4 +13 -1
- vtlengine/AST/Grammar/lexer.py +2012 -1312
- vtlengine/AST/Grammar/parser.py +7524 -4343
- vtlengine/AST/Grammar/tokens.py +140 -128
- vtlengine/AST/VtlVisitor.py +16 -5
- vtlengine/AST/__init__.py +41 -11
- vtlengine/DataTypes/NumericTypesHandling.py +5 -4
- vtlengine/DataTypes/TimeHandling.py +196 -301
- vtlengine/DataTypes/__init__.py +304 -218
- vtlengine/Exceptions/__init__.py +96 -27
- vtlengine/Exceptions/messages.py +149 -69
- vtlengine/Interpreter/__init__.py +817 -497
- vtlengine/Model/__init__.py +172 -121
- vtlengine/Operators/Aggregation.py +156 -95
- vtlengine/Operators/Analytic.py +167 -79
- vtlengine/Operators/Assignment.py +7 -4
- vtlengine/Operators/Boolean.py +27 -32
- vtlengine/Operators/CastOperator.py +177 -131
- vtlengine/Operators/Clause.py +137 -99
- vtlengine/Operators/Comparison.py +148 -117
- vtlengine/Operators/Conditional.py +290 -98
- vtlengine/Operators/General.py +68 -47
- vtlengine/Operators/HROperators.py +91 -72
- vtlengine/Operators/Join.py +217 -118
- vtlengine/Operators/Numeric.py +129 -46
- vtlengine/Operators/RoleSetter.py +16 -15
- vtlengine/Operators/Set.py +61 -36
- vtlengine/Operators/String.py +213 -139
- vtlengine/Operators/Time.py +467 -215
- vtlengine/Operators/Validation.py +117 -76
- vtlengine/Operators/__init__.py +340 -213
- vtlengine/Utils/__init__.py +232 -41
- vtlengine/__init__.py +1 -1
- vtlengine/files/output/__init__.py +15 -6
- vtlengine/files/output/_time_period_representation.py +10 -9
- vtlengine/files/parser/__init__.py +79 -52
- vtlengine/files/parser/_rfc_dialect.py +6 -5
- vtlengine/files/parser/_time_checking.py +48 -37
- vtlengine-1.0.2.dist-info/METADATA +245 -0
- vtlengine-1.0.2.dist-info/RECORD +58 -0
- {vtlengine-1.0.dist-info → vtlengine-1.0.2.dist-info}/WHEEL +1 -1
- vtlengine-1.0.dist-info/METADATA +0 -104
- vtlengine-1.0.dist-info/RECORD +0 -58
- {vtlengine-1.0.dist-info → vtlengine-1.0.2.dist-info}/LICENSE.md +0 -0
|
@@ -1,31 +1,37 @@
|
|
|
1
|
-
import os
|
|
2
1
|
from copy import copy
|
|
3
|
-
from typing import Union
|
|
2
|
+
from typing import Union, Any, List
|
|
4
3
|
|
|
5
4
|
import numpy as np
|
|
6
5
|
|
|
7
|
-
from vtlengine.DataTypes import
|
|
8
|
-
|
|
6
|
+
from vtlengine.DataTypes import (
|
|
7
|
+
Boolean,
|
|
8
|
+
COMP_NAME_MAPPING,
|
|
9
|
+
binary_implicit_promotion,
|
|
10
|
+
SCALAR_TYPES_CLASS_REVERSE,
|
|
11
|
+
Null,
|
|
12
|
+
)
|
|
9
13
|
from vtlengine.Operators import Operator, Binary
|
|
10
14
|
|
|
11
15
|
from vtlengine.Exceptions import SemanticError
|
|
12
16
|
from vtlengine.Model import Scalar, DataComponent, Dataset, Role
|
|
13
17
|
|
|
14
|
-
if os.environ.get("SPARK", False):
|
|
15
|
-
|
|
16
|
-
else:
|
|
17
|
-
|
|
18
|
+
# if os.environ.get("SPARK", False):
|
|
19
|
+
# import pyspark.pandas as pd
|
|
20
|
+
# else:
|
|
21
|
+
# import pandas as pd
|
|
22
|
+
import pandas as pd
|
|
18
23
|
|
|
19
24
|
|
|
20
25
|
class If(Operator):
|
|
21
26
|
"""
|
|
22
27
|
If class:
|
|
23
|
-
`If-then-else <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=225&zoom=100,72,142>`_ operator
|
|
28
|
+
`If-then-else <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=225&zoom=100,72,142>`_ operator # noqa E501
|
|
24
29
|
inherits from Operator, a superclass that contains general validate and evaluate class methods.
|
|
25
30
|
It has the following class methods:
|
|
26
31
|
Class methods:
|
|
27
|
-
evaluate: Evaluates if the operation is well constructed, checking the actual condition and
|
|
28
|
-
|
|
32
|
+
evaluate: Evaluates if the operation is well constructed, checking the actual condition and
|
|
33
|
+
dropping a boolean result.
|
|
34
|
+
The result will depend on the data class, such as datacomponent and dataset.
|
|
29
35
|
|
|
30
36
|
component_level_evaluation: Returns a pandas dataframe with data to set the condition
|
|
31
37
|
|
|
@@ -37,79 +43,97 @@ class If(Operator):
|
|
|
37
43
|
"""
|
|
38
44
|
|
|
39
45
|
@classmethod
|
|
40
|
-
def evaluate(cls, condition, true_branch, false_branch):
|
|
46
|
+
def evaluate(cls, condition: Any, true_branch: Any, false_branch: Any) -> Any:
|
|
41
47
|
result = cls.validate(condition, true_branch, false_branch)
|
|
42
|
-
if isinstance(
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
48
|
+
if not isinstance(result, Scalar):
|
|
49
|
+
if isinstance(condition, DataComponent):
|
|
50
|
+
result.data = cls.component_level_evaluation(condition, true_branch, false_branch)
|
|
51
|
+
if isinstance(condition, Dataset):
|
|
52
|
+
result = cls.dataset_level_evaluation(result, condition, true_branch, false_branch)
|
|
46
53
|
return result
|
|
47
54
|
|
|
48
55
|
@classmethod
|
|
49
|
-
def component_level_evaluation(
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
56
|
+
def component_level_evaluation(
|
|
57
|
+
cls, condition: DataComponent, true_branch: Any, false_branch: Any
|
|
58
|
+
) -> Any:
|
|
59
|
+
result = None
|
|
60
|
+
if condition.data is not None:
|
|
61
|
+
if isinstance(true_branch, Scalar):
|
|
62
|
+
true_data = pd.Series(true_branch.value, index=condition.data.index)
|
|
63
|
+
else:
|
|
64
|
+
true_data = true_branch.data.reindex(condition.data.index)
|
|
65
|
+
if isinstance(false_branch, Scalar):
|
|
66
|
+
false_data = pd.Series(false_branch.value, index=condition.data.index)
|
|
67
|
+
else:
|
|
68
|
+
false_data = false_branch.data.reindex(condition.data.index)
|
|
69
|
+
result = np.where(condition.data, true_data, false_data)
|
|
62
70
|
return pd.Series(result, index=condition.data.index)
|
|
63
71
|
|
|
64
72
|
@classmethod
|
|
65
|
-
def dataset_level_evaluation(
|
|
73
|
+
def dataset_level_evaluation(
|
|
74
|
+
cls, result: Any, condition: Any, true_branch: Any, false_branch: Any
|
|
75
|
+
) -> Dataset:
|
|
66
76
|
ids = condition.get_identifiers_names()
|
|
67
77
|
condition_measure = condition.get_measures_names()[0]
|
|
68
78
|
true_data = condition.data[condition.data[condition_measure] == True]
|
|
69
79
|
false_data = condition.data[condition.data[condition_measure] != True].fillna(False)
|
|
70
80
|
|
|
71
81
|
if isinstance(true_branch, Dataset):
|
|
72
|
-
if len(true_data) > 0:
|
|
73
|
-
true_data = pd.merge(
|
|
74
|
-
|
|
82
|
+
if len(true_data) > 0 and true_branch.data is not None:
|
|
83
|
+
true_data = pd.merge(
|
|
84
|
+
true_data, true_branch.data, on=ids, how="right", suffixes=("_condition", "")
|
|
85
|
+
)
|
|
75
86
|
else:
|
|
76
87
|
true_data = pd.DataFrame(columns=true_branch.get_components_names())
|
|
77
88
|
else:
|
|
78
89
|
true_data[condition_measure] = true_data[condition_measure].apply(
|
|
79
|
-
lambda x: true_branch.value
|
|
90
|
+
lambda x: true_branch.value
|
|
91
|
+
)
|
|
80
92
|
if isinstance(false_branch, Dataset):
|
|
81
|
-
if len(false_data) > 0:
|
|
82
|
-
false_data = pd.merge(
|
|
83
|
-
|
|
93
|
+
if len(false_data) > 0 and false_branch.data is not None:
|
|
94
|
+
false_data = pd.merge(
|
|
95
|
+
false_data, false_branch.data, on=ids, how="right", suffixes=("_condition", "")
|
|
96
|
+
)
|
|
84
97
|
else:
|
|
85
98
|
false_data = pd.DataFrame(columns=false_branch.get_components_names())
|
|
86
99
|
else:
|
|
87
100
|
false_data[condition_measure] = false_data[condition_measure].apply(
|
|
88
|
-
lambda x: false_branch.value
|
|
101
|
+
lambda x: false_branch.value
|
|
102
|
+
)
|
|
89
103
|
|
|
90
|
-
result.data =
|
|
91
|
-
|
|
104
|
+
result.data = (
|
|
105
|
+
pd.concat([true_data, false_data], ignore_index=True)
|
|
106
|
+
.drop_duplicates()
|
|
107
|
+
.sort_values(by=ids)
|
|
108
|
+
)
|
|
92
109
|
if isinstance(result, Dataset):
|
|
93
|
-
drop_columns = [
|
|
94
|
-
|
|
110
|
+
drop_columns = [
|
|
111
|
+
column for column in result.data.columns if column not in result.components.keys()
|
|
112
|
+
]
|
|
95
113
|
result.data = result.data.dropna(subset=drop_columns).drop(columns=drop_columns)
|
|
96
114
|
if isinstance(true_branch, Scalar) and isinstance(false_branch, Scalar):
|
|
97
115
|
result.get_measures()[0].data_type = true_branch.data_type
|
|
98
116
|
result.get_measures()[0].name = COMP_NAME_MAPPING[true_branch.data_type]
|
|
99
|
-
result.data
|
|
100
|
-
|
|
117
|
+
if result.data is not None:
|
|
118
|
+
result.data = result.data.rename(
|
|
119
|
+
columns={condition_measure: result.get_measures()[0].name}
|
|
120
|
+
)
|
|
101
121
|
return result
|
|
102
122
|
|
|
103
123
|
@classmethod
|
|
104
|
-
def validate(
|
|
124
|
+
def validate( # noqa: C901
|
|
125
|
+
cls, condition: Any, true_branch: Any, false_branch: Any
|
|
126
|
+
) -> Union[Scalar, DataComponent, Dataset]:
|
|
105
127
|
nullable = False
|
|
106
128
|
left = true_branch
|
|
107
129
|
right = false_branch
|
|
108
130
|
if true_branch.__class__ != false_branch.__class__:
|
|
109
|
-
if (isinstance(true_branch, DataComponent) and isinstance(false_branch, Dataset)) or
|
|
110
|
-
|
|
131
|
+
if (isinstance(true_branch, DataComponent) and isinstance(false_branch, Dataset)) or (
|
|
132
|
+
isinstance(true_branch, Dataset) and isinstance(false_branch, DataComponent)
|
|
133
|
+
):
|
|
111
134
|
raise ValueError(
|
|
112
|
-
"If then and else operands cannot be dataset and component respectively"
|
|
135
|
+
"If then and else operands cannot be dataset and component respectively"
|
|
136
|
+
)
|
|
113
137
|
if isinstance(true_branch, Scalar):
|
|
114
138
|
left = false_branch
|
|
115
139
|
right = true_branch
|
|
@@ -117,42 +141,48 @@ class If(Operator):
|
|
|
117
141
|
# Datacomponent
|
|
118
142
|
if isinstance(condition, DataComponent):
|
|
119
143
|
if not condition.data_type == Boolean:
|
|
120
|
-
raise SemanticError(
|
|
121
|
-
|
|
144
|
+
raise SemanticError(
|
|
145
|
+
"1-1-9-11", op=cls.op, type=SCALAR_TYPES_CLASS_REVERSE[condition.data_type]
|
|
146
|
+
)
|
|
122
147
|
if not isinstance(left, Scalar) or not isinstance(right, Scalar):
|
|
123
148
|
nullable = condition.nullable
|
|
124
149
|
else:
|
|
125
|
-
if
|
|
126
|
-
nullable = True
|
|
127
|
-
if isinstance(right, Scalar) and right.data_type == Null:
|
|
150
|
+
if left.data_type == Null or right.data_type == Null:
|
|
128
151
|
nullable = True
|
|
129
152
|
if isinstance(left, DataComponent):
|
|
130
153
|
nullable |= left.nullable
|
|
131
154
|
if isinstance(right, DataComponent):
|
|
132
155
|
nullable |= right.nullable
|
|
133
|
-
return DataComponent(
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
156
|
+
return DataComponent(
|
|
157
|
+
name="result",
|
|
158
|
+
data=None,
|
|
159
|
+
data_type=binary_implicit_promotion(left.data_type, right.data_type),
|
|
160
|
+
role=Role.MEASURE,
|
|
161
|
+
nullable=nullable,
|
|
162
|
+
)
|
|
137
163
|
|
|
138
164
|
# Dataset
|
|
139
165
|
if isinstance(left, Scalar) and isinstance(right, Scalar):
|
|
140
|
-
raise SemanticError(
|
|
141
|
-
|
|
166
|
+
raise SemanticError(
|
|
167
|
+
"1-1-9-12", op=cls.op, then_symbol=left.name, else_symbol=right.name
|
|
168
|
+
)
|
|
142
169
|
if isinstance(left, DataComponent):
|
|
143
|
-
raise SemanticError(
|
|
144
|
-
|
|
170
|
+
raise SemanticError(
|
|
171
|
+
"1-1-9-12", op=cls.op, then_symbol=left.name, else_symbol=right.name
|
|
172
|
+
)
|
|
145
173
|
if isinstance(left, Scalar):
|
|
146
|
-
left.data_type = right.data_type = binary_implicit_promotion(
|
|
147
|
-
|
|
148
|
-
|
|
174
|
+
left.data_type = right.data_type = binary_implicit_promotion(
|
|
175
|
+
left.data_type, right.data_type
|
|
176
|
+
)
|
|
177
|
+
return Dataset(name="result", components=copy(condition.components), data=None)
|
|
149
178
|
if left.get_identifiers() != condition.get_identifiers():
|
|
150
179
|
raise SemanticError("1-1-9-10", op=cls.op, clause=left.name)
|
|
151
180
|
if isinstance(right, Scalar):
|
|
152
181
|
for component in left.get_measures():
|
|
153
182
|
if component.data_type != right.data_type:
|
|
154
|
-
component.data_type = binary_implicit_promotion(
|
|
155
|
-
|
|
183
|
+
component.data_type = binary_implicit_promotion(
|
|
184
|
+
component.data_type, right.data_type
|
|
185
|
+
)
|
|
156
186
|
if isinstance(right, Dataset):
|
|
157
187
|
if left.get_identifiers() != condition.get_identifiers():
|
|
158
188
|
raise SemanticError("1-1-9-10", op=cls.op, clause=right.name)
|
|
@@ -160,80 +190,242 @@ class If(Operator):
|
|
|
160
190
|
raise SemanticError("1-1-9-13", op=cls.op, then=left.name, else_clause=right.name)
|
|
161
191
|
for component in left.get_measures():
|
|
162
192
|
if component.data_type != right.components[component.name].data_type:
|
|
163
|
-
component.data_type = right.components[component.name].data_type =
|
|
164
|
-
binary_implicit_promotion(
|
|
165
|
-
|
|
193
|
+
component.data_type = right.components[component.name].data_type = (
|
|
194
|
+
binary_implicit_promotion(
|
|
195
|
+
component.data_type, right.components[component.name].data_type
|
|
196
|
+
)
|
|
197
|
+
)
|
|
166
198
|
if isinstance(condition, Dataset):
|
|
167
199
|
if len(condition.get_measures()) != 1:
|
|
168
200
|
raise SemanticError("1-1-9-4", op=cls.op, name=condition.name)
|
|
169
201
|
if condition.get_measures()[0].data_type != Boolean:
|
|
170
|
-
raise SemanticError(
|
|
171
|
-
|
|
202
|
+
raise SemanticError(
|
|
203
|
+
"1-1-9-5",
|
|
204
|
+
op=cls.op,
|
|
205
|
+
type=SCALAR_TYPES_CLASS_REVERSE[condition.get_measures()[0].data_type],
|
|
206
|
+
)
|
|
172
207
|
if left.get_identifiers() != condition.get_identifiers():
|
|
173
208
|
raise SemanticError("1-1-9-6", op=cls.op)
|
|
174
209
|
result_components = {comp_name: copy(comp) for comp_name, comp in left.components.items()}
|
|
175
|
-
return Dataset(name=
|
|
210
|
+
return Dataset(name="result", components=result_components, data=None)
|
|
176
211
|
|
|
177
212
|
|
|
178
213
|
class Nvl(Binary):
|
|
179
214
|
"""
|
|
180
215
|
Null class:
|
|
181
|
-
`Nvl <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=229&zoom=100,72,370>`_operator class.
|
|
216
|
+
`Nvl <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=229&zoom=100,72,370>`_operator class. # noqa E501
|
|
182
217
|
It has the following class methods:
|
|
183
218
|
|
|
184
219
|
Class methods:
|
|
185
|
-
Validate: Class method that validates if the operation at scalar,
|
|
220
|
+
Validate: Class method that validates if the operation at scalar,
|
|
221
|
+
datacomponent or dataset level can be performed.
|
|
186
222
|
Evaluate: Evaluates the actual operation, returning the result.
|
|
187
223
|
"""
|
|
188
224
|
|
|
189
225
|
@classmethod
|
|
190
|
-
def evaluate(cls, left, right):
|
|
226
|
+
def evaluate(cls, left: Any, right: Any) -> Union[Scalar, DataComponent, Dataset]:
|
|
191
227
|
result = cls.validate(left, right)
|
|
192
228
|
|
|
193
|
-
if isinstance(left, Scalar):
|
|
194
|
-
if pd.isnull(left):
|
|
229
|
+
if isinstance(left, Scalar) and isinstance(result, Scalar):
|
|
230
|
+
if pd.isnull(left): # type: ignore[call-overload]
|
|
195
231
|
result.value = right.value
|
|
196
232
|
else:
|
|
197
233
|
result.value = left.value
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
234
|
+
else:
|
|
235
|
+
if not isinstance(result, Scalar):
|
|
236
|
+
if isinstance(right, Scalar):
|
|
237
|
+
result.data = left.data.fillna(right.value)
|
|
238
|
+
else:
|
|
239
|
+
result.data = left.data.fillna(right.data)
|
|
240
|
+
if isinstance(result, Dataset):
|
|
241
|
+
result.data = result.data[result.get_components_names()]
|
|
205
242
|
return result
|
|
206
243
|
|
|
207
244
|
@classmethod
|
|
208
|
-
def validate(cls, left, right) -> Scalar
|
|
245
|
+
def validate(cls, left: Any, right: Any) -> Union[Scalar, DataComponent, Dataset]:
|
|
246
|
+
result_components = {}
|
|
209
247
|
if isinstance(left, Scalar):
|
|
210
248
|
if not isinstance(right, Scalar):
|
|
211
249
|
raise ValueError(
|
|
212
|
-
"Nvl operation at scalar level must have scalar
|
|
250
|
+
"Nvl operation at scalar level must have scalar "
|
|
251
|
+
"types on right (applicable) side"
|
|
252
|
+
)
|
|
213
253
|
cls.type_validation(left.data_type, right.data_type)
|
|
214
|
-
return Scalar(name=
|
|
254
|
+
return Scalar(name="result", value=None, data_type=left.data_type)
|
|
215
255
|
if isinstance(left, DataComponent):
|
|
216
256
|
if isinstance(right, Dataset):
|
|
217
257
|
raise ValueError(
|
|
218
|
-
"Nvl operation at component level cannot have
|
|
258
|
+
"Nvl operation at component level cannot have "
|
|
259
|
+
"dataset type on right (applicable) side"
|
|
260
|
+
)
|
|
219
261
|
cls.type_validation(left.data_type, right.data_type)
|
|
220
|
-
return DataComponent(
|
|
221
|
-
|
|
222
|
-
|
|
262
|
+
return DataComponent(
|
|
263
|
+
name="result",
|
|
264
|
+
data=pd.Series(dtype=object),
|
|
265
|
+
data_type=left.data_type,
|
|
266
|
+
role=Role.MEASURE,
|
|
267
|
+
nullable=False,
|
|
268
|
+
)
|
|
223
269
|
if isinstance(left, Dataset):
|
|
224
270
|
if isinstance(right, DataComponent):
|
|
225
271
|
raise ValueError(
|
|
226
|
-
"Nvl operation at dataset level cannot have component
|
|
272
|
+
"Nvl operation at dataset level cannot have component "
|
|
273
|
+
"type on right (applicable) side"
|
|
274
|
+
)
|
|
227
275
|
if isinstance(right, Scalar):
|
|
228
276
|
for component in left.get_measures():
|
|
229
277
|
cls.type_validation(component.data_type, right.data_type)
|
|
230
278
|
if isinstance(right, Dataset):
|
|
231
279
|
for component in left.get_measures():
|
|
232
|
-
cls.type_validation(
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
280
|
+
cls.type_validation(
|
|
281
|
+
component.data_type, right.components[component.name].data_type
|
|
282
|
+
)
|
|
283
|
+
result_components = {
|
|
284
|
+
comp_name: copy(comp)
|
|
285
|
+
for comp_name, comp in left.components.items()
|
|
286
|
+
if comp.role != Role.ATTRIBUTE
|
|
287
|
+
}
|
|
237
288
|
for comp in result_components.values():
|
|
238
289
|
comp.nullable = False
|
|
239
|
-
|
|
290
|
+
return Dataset(
|
|
291
|
+
name="result",
|
|
292
|
+
components=result_components,
|
|
293
|
+
data=None
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
class Case(Operator):
|
|
298
|
+
|
|
299
|
+
@classmethod
|
|
300
|
+
def evaluate(cls,
|
|
301
|
+
conditions: List[Any],
|
|
302
|
+
thenOps: List[Any],
|
|
303
|
+
elseOp: Any
|
|
304
|
+
) -> Union[Scalar, DataComponent, Dataset]:
|
|
305
|
+
|
|
306
|
+
result = cls.validate(conditions, thenOps, elseOp)
|
|
307
|
+
|
|
308
|
+
if isinstance(result, Scalar):
|
|
309
|
+
result.value = elseOp.value
|
|
310
|
+
for i in range(len(conditions)):
|
|
311
|
+
if conditions[i].value:
|
|
312
|
+
result.value = thenOps[i].value
|
|
313
|
+
|
|
314
|
+
if isinstance(result, DataComponent):
|
|
315
|
+
result.data = pd.Series(None, index=conditions[0].data.index)
|
|
316
|
+
|
|
317
|
+
for i, condition in enumerate(conditions):
|
|
318
|
+
value = thenOps[i].value if isinstance(thenOps[i], Scalar) else thenOps[i].data
|
|
319
|
+
result.data = np.where(condition.data, value, # type: ignore[call-overload]
|
|
320
|
+
result.data)
|
|
321
|
+
|
|
322
|
+
condition_mask_else = ~np.any([condition.data for condition in conditions], axis=0)
|
|
323
|
+
else_value = elseOp.value if isinstance(elseOp, Scalar) else elseOp.data
|
|
324
|
+
result.data = pd.Series(np.where(condition_mask_else, else_value, result.data),
|
|
325
|
+
index=conditions[0].data.index)
|
|
326
|
+
|
|
327
|
+
if isinstance(result, Dataset):
|
|
328
|
+
identifiers = result.get_identifiers_names()
|
|
329
|
+
columns = [col for col in result.get_components_names() if col not in identifiers]
|
|
330
|
+
result.data = (conditions[0].data[identifiers] if conditions[0].data is not None
|
|
331
|
+
else pd.DataFrame(columns=identifiers))
|
|
332
|
+
|
|
333
|
+
for i in range(len(conditions)):
|
|
334
|
+
condition = conditions[i]
|
|
335
|
+
bool_col = next(x.name for x in condition.get_measures() if x.data_type == Boolean)
|
|
336
|
+
condition_mask = condition.data[bool_col]
|
|
337
|
+
|
|
338
|
+
result.data.loc[condition_mask, columns] = (
|
|
339
|
+
thenOps[i].value if isinstance(thenOps[i], Scalar)
|
|
340
|
+
else thenOps[i].data.loc[condition_mask, columns]
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
condition_mask_else = ~np.logical_or.reduce([
|
|
344
|
+
condition.data[next(x.name for x in condition.get_measures() if
|
|
345
|
+
x.data_type == Boolean)].astype(bool) for
|
|
346
|
+
condition in conditions])
|
|
347
|
+
|
|
348
|
+
result.data.loc[condition_mask_else, columns] = (
|
|
349
|
+
elseOp.value if isinstance(elseOp, Scalar)
|
|
350
|
+
else elseOp.data.loc[condition_mask_else, columns]
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
return result
|
|
354
|
+
|
|
355
|
+
@classmethod
|
|
356
|
+
def validate(cls,
|
|
357
|
+
conditions: List[Any],
|
|
358
|
+
thenOps: List[Any],
|
|
359
|
+
elseOp: Any
|
|
360
|
+
) -> Union[Scalar, DataComponent, Dataset]:
|
|
361
|
+
|
|
362
|
+
if len(set(map(type, conditions))) > 1:
|
|
363
|
+
raise SemanticError("2-1-9-1", op=cls.op)
|
|
364
|
+
|
|
365
|
+
ops = thenOps + [elseOp]
|
|
366
|
+
then_else_types = set(map(type, ops))
|
|
367
|
+
condition_type = type(conditions[0])
|
|
368
|
+
|
|
369
|
+
if condition_type is Scalar:
|
|
370
|
+
for condition in conditions:
|
|
371
|
+
if condition.data_type != Boolean:
|
|
372
|
+
raise SemanticError("2-1-9-2", op=cls.op, name=condition.name)
|
|
373
|
+
if list(then_else_types) != [Scalar]:
|
|
374
|
+
raise SemanticError("2-1-9-3", op=cls.op)
|
|
375
|
+
|
|
376
|
+
# The output data type is the data type of the last then operation that has a true
|
|
377
|
+
# condition, defaulting to the data type of the else operation if no condition is true
|
|
378
|
+
output_data_type = elseOp.data_type
|
|
379
|
+
for i in range(len(conditions)):
|
|
380
|
+
if conditions[i].value:
|
|
381
|
+
output_data_type = thenOps[i].data_type
|
|
382
|
+
|
|
383
|
+
return Scalar(
|
|
384
|
+
name="result",
|
|
385
|
+
value=None,
|
|
386
|
+
data_type=output_data_type,
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
elif condition_type is DataComponent:
|
|
390
|
+
for condition in conditions:
|
|
391
|
+
if not condition.data_type == Boolean:
|
|
392
|
+
raise SemanticError("2-1-9-4", op=cls.op, name=condition.name)
|
|
393
|
+
|
|
394
|
+
nullable = any(
|
|
395
|
+
thenOp.nullable if isinstance(thenOp, DataComponent) else thenOp.data_type == Null
|
|
396
|
+
for thenOp in ops
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
data_type = ops[0].data_type
|
|
400
|
+
for op in ops[1:]:
|
|
401
|
+
data_type = binary_implicit_promotion(data_type, op.data_type)
|
|
402
|
+
|
|
403
|
+
return DataComponent(
|
|
404
|
+
name="result",
|
|
405
|
+
data=None,
|
|
406
|
+
data_type=data_type,
|
|
407
|
+
role=Role.MEASURE,
|
|
408
|
+
nullable=nullable,
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
# Dataset
|
|
412
|
+
for condition in conditions:
|
|
413
|
+
if len(condition.get_measures_names()) != 1:
|
|
414
|
+
raise SemanticError("1-1-1-4", op=cls.op)
|
|
415
|
+
if condition.get_measures()[0].data_type != Boolean:
|
|
416
|
+
raise SemanticError("2-1-9-5", op=cls.op, name=condition.name)
|
|
417
|
+
|
|
418
|
+
if Dataset not in then_else_types:
|
|
419
|
+
raise SemanticError("2-1-9-6", op=cls.op)
|
|
420
|
+
|
|
421
|
+
components = next(op for op in ops if isinstance(op, Dataset)).components
|
|
422
|
+
comp_names = [comp.name for comp in components.values()]
|
|
423
|
+
for op in ops:
|
|
424
|
+
if isinstance(op, Dataset) and op.get_components_names() != comp_names:
|
|
425
|
+
raise SemanticError("2-1-9-7", op=cls.op)
|
|
426
|
+
|
|
427
|
+
return Dataset(
|
|
428
|
+
name="result",
|
|
429
|
+
components=components,
|
|
430
|
+
data=None
|
|
431
|
+
)
|