vtlengine 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +19 -8
- vtlengine/API/__init__.py +9 -9
- vtlengine/AST/ASTConstructor.py +23 -43
- vtlengine/AST/ASTConstructorModules/Expr.py +147 -71
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +104 -40
- vtlengine/AST/ASTConstructorModules/Terminals.py +28 -39
- vtlengine/AST/ASTTemplate.py +16 -1
- vtlengine/AST/DAG/__init__.py +12 -15
- vtlengine/AST/Grammar/Vtl.g4 +49 -20
- vtlengine/AST/Grammar/VtlTokens.g4 +13 -1
- vtlengine/AST/Grammar/lexer.py +1293 -1183
- vtlengine/AST/Grammar/parser.py +5758 -3939
- vtlengine/AST/Grammar/tokens.py +12 -0
- vtlengine/AST/VtlVisitor.py +9 -2
- vtlengine/AST/__init__.py +21 -3
- vtlengine/DataTypes/TimeHandling.py +12 -7
- vtlengine/DataTypes/__init__.py +17 -24
- vtlengine/Exceptions/__init__.py +43 -1
- vtlengine/Exceptions/messages.py +82 -62
- vtlengine/Interpreter/__init__.py +125 -120
- vtlengine/Model/__init__.py +17 -12
- vtlengine/Operators/Aggregation.py +14 -14
- vtlengine/Operators/Analytic.py +56 -31
- vtlengine/Operators/Assignment.py +2 -3
- vtlengine/Operators/Boolean.py +5 -7
- vtlengine/Operators/CastOperator.py +12 -13
- vtlengine/Operators/Clause.py +11 -13
- vtlengine/Operators/Comparison.py +31 -17
- vtlengine/Operators/Conditional.py +157 -17
- vtlengine/Operators/General.py +4 -4
- vtlengine/Operators/HROperators.py +41 -34
- vtlengine/Operators/Join.py +18 -22
- vtlengine/Operators/Numeric.py +76 -39
- vtlengine/Operators/RoleSetter.py +6 -8
- vtlengine/Operators/Set.py +7 -12
- vtlengine/Operators/String.py +19 -27
- vtlengine/Operators/Time.py +366 -43
- vtlengine/Operators/Validation.py +4 -7
- vtlengine/Operators/__init__.py +38 -41
- vtlengine/Utils/__init__.py +149 -94
- vtlengine/__init__.py +1 -1
- vtlengine/files/output/__init__.py +2 -2
- vtlengine/files/output/_time_period_representation.py +0 -1
- vtlengine/files/parser/__init__.py +18 -18
- vtlengine/files/parser/_time_checking.py +3 -2
- {vtlengine-1.0.1.dist-info → vtlengine-1.0.3.dist-info}/METADATA +17 -5
- vtlengine-1.0.3.dist-info/RECORD +58 -0
- vtlengine-1.0.1.dist-info/RECORD +0 -58
- {vtlengine-1.0.1.dist-info → vtlengine-1.0.3.dist-info}/LICENSE.md +0 -0
- {vtlengine-1.0.1.dist-info → vtlengine-1.0.3.dist-info}/WHEEL +0 -0
vtlengine/Operators/Analytic.py
CHANGED
|
@@ -3,8 +3,6 @@ from typing import List, Optional
|
|
|
3
3
|
|
|
4
4
|
import duckdb
|
|
5
5
|
|
|
6
|
-
from vtlengine.Exceptions import SemanticError
|
|
7
|
-
|
|
8
6
|
# if os.environ.get("SPARK"):
|
|
9
7
|
# import pyspark.pandas as pd
|
|
10
8
|
# else:
|
|
@@ -32,6 +30,7 @@ from vtlengine.AST.Grammar.tokens import (
|
|
|
32
30
|
VAR_SAMP,
|
|
33
31
|
)
|
|
34
32
|
from vtlengine.DataTypes import COMP_NAME_MAPPING, Integer, Number, unary_implicit_promotion
|
|
33
|
+
from vtlengine.Exceptions import SemanticError
|
|
35
34
|
from vtlengine.Model import Component, Dataset, Role
|
|
36
35
|
|
|
37
36
|
|
|
@@ -58,11 +57,9 @@ class Analytic(Operator.Unary):
|
|
|
58
57
|
ordering: Optional[List[OrderBy]],
|
|
59
58
|
window: Optional[Windowing],
|
|
60
59
|
params: Optional[List[int]],
|
|
60
|
+
component_name: Optional[str] = None,
|
|
61
61
|
) -> Dataset:
|
|
62
|
-
if ordering is None
|
|
63
|
-
order_components = []
|
|
64
|
-
else:
|
|
65
|
-
order_components = [o.component for o in ordering]
|
|
62
|
+
order_components = [] if ordering is None else [o.component for o in ordering]
|
|
66
63
|
identifier_names = operand.get_identifiers_names()
|
|
67
64
|
result_components = operand.components.copy()
|
|
68
65
|
|
|
@@ -83,25 +80,51 @@ class Analytic(Operator.Unary):
|
|
|
83
80
|
raise SemanticError(
|
|
84
81
|
"1-1-1-10", op=cls.op, comp_name=comp_name, dataset_name=operand.name
|
|
85
82
|
)
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
83
|
+
if component_name is not None:
|
|
84
|
+
if cls.type_to_check is not None:
|
|
85
|
+
unary_implicit_promotion(
|
|
86
|
+
operand.components[component_name].data_type, cls.type_to_check
|
|
87
|
+
)
|
|
88
|
+
if cls.return_type is not None:
|
|
89
|
+
result_components[component_name] = Component(
|
|
90
|
+
name=component_name,
|
|
91
|
+
data_type=cls.return_type,
|
|
92
|
+
role=operand.components[component_name].role,
|
|
93
|
+
nullable=operand.components[component_name].nullable,
|
|
94
|
+
)
|
|
95
|
+
if cls.op == COUNT:
|
|
96
|
+
measure_name = COMP_NAME_MAPPING[cls.return_type]
|
|
97
|
+
result_components[measure_name] = Component(
|
|
98
|
+
name=measure_name,
|
|
99
|
+
data_type=cls.return_type,
|
|
100
|
+
role=Role.MEASURE,
|
|
101
|
+
nullable=operand.components[component_name].nullable,
|
|
102
|
+
)
|
|
103
|
+
if component_name in result_components:
|
|
104
|
+
del result_components[component_name]
|
|
105
|
+
else:
|
|
106
|
+
measures = operand.get_measures()
|
|
107
|
+
if len(measures) == 0:
|
|
108
|
+
raise SemanticError("1-1-1-8", op=cls.op, name=operand.name)
|
|
109
|
+
if cls.type_to_check is not None:
|
|
110
|
+
for measure in measures:
|
|
111
|
+
unary_implicit_promotion(measure.data_type, cls.type_to_check)
|
|
112
|
+
if cls.return_type is not None:
|
|
113
|
+
for measure in measures:
|
|
114
|
+
new_measure = copy(measure)
|
|
115
|
+
new_measure.data_type = cls.return_type
|
|
116
|
+
result_components[measure.name] = new_measure
|
|
117
|
+
if cls.op == COUNT and len(measures) <= 1:
|
|
118
|
+
measure_name = COMP_NAME_MAPPING[cls.return_type]
|
|
119
|
+
nullable = False if len(measures) == 0 else measures[0].nullable
|
|
120
|
+
if len(measures) == 1:
|
|
121
|
+
del result_components[measures[0].name]
|
|
122
|
+
result_components[measure_name] = Component(
|
|
123
|
+
name=measure_name,
|
|
124
|
+
data_type=cls.return_type,
|
|
125
|
+
role=Role.MEASURE,
|
|
126
|
+
nullable=nullable,
|
|
127
|
+
)
|
|
105
128
|
|
|
106
129
|
return Dataset(name="result", components=result_components, data=None)
|
|
107
130
|
|
|
@@ -151,10 +174,7 @@ class Analytic(Operator.Unary):
|
|
|
151
174
|
window_str = f"{mode} BETWEEN {window.start} {start_mode} AND {window.stop} {stop_mode}"
|
|
152
175
|
|
|
153
176
|
# Partitioning
|
|
154
|
-
if len(partitioning) > 0
|
|
155
|
-
partition = "PARTITION BY " + ", ".join(partitioning)
|
|
156
|
-
else:
|
|
157
|
-
partition = ""
|
|
177
|
+
partition = "PARTITION BY " + ", ".join(partitioning) if len(partitioning) > 0 else ""
|
|
158
178
|
|
|
159
179
|
# Ordering
|
|
160
180
|
order_str = ""
|
|
@@ -205,12 +225,17 @@ class Analytic(Operator.Unary):
|
|
|
205
225
|
ordering: Optional[List[OrderBy]],
|
|
206
226
|
window: Optional[Windowing],
|
|
207
227
|
params: Optional[List[int]],
|
|
228
|
+
component_name: Optional[str] = None,
|
|
208
229
|
) -> Dataset:
|
|
209
|
-
result = cls.validate(operand, partitioning, ordering, window, params)
|
|
230
|
+
result = cls.validate(operand, partitioning, ordering, window, params, component_name)
|
|
210
231
|
df = operand.data.copy() if operand.data is not None else pd.DataFrame()
|
|
211
|
-
measure_names = operand.get_measures_names()
|
|
212
232
|
identifier_names = operand.get_identifiers_names()
|
|
213
233
|
|
|
234
|
+
if component_name is not None:
|
|
235
|
+
measure_names = [component_name]
|
|
236
|
+
else:
|
|
237
|
+
measure_names = operand.get_measures_names()
|
|
238
|
+
|
|
214
239
|
result.data = cls.analyticfunc(
|
|
215
240
|
df=df,
|
|
216
241
|
partitioning=partitioning,
|
|
@@ -1,9 +1,8 @@
|
|
|
1
|
-
from typing import
|
|
2
|
-
|
|
3
|
-
from vtlengine.Operators import Binary
|
|
1
|
+
from typing import Any, Union
|
|
4
2
|
|
|
5
3
|
from vtlengine.Exceptions import SemanticError
|
|
6
4
|
from vtlengine.Model import DataComponent, Dataset
|
|
5
|
+
from vtlengine.Operators import Binary
|
|
7
6
|
|
|
8
7
|
ALL_MODEL_TYPES = Union[DataComponent, Dataset]
|
|
9
8
|
|
vtlengine/Operators/Boolean.py
CHANGED
|
@@ -2,13 +2,13 @@
|
|
|
2
2
|
# import pyspark.pandas as pd
|
|
3
3
|
# else:
|
|
4
4
|
# import pandas as pd
|
|
5
|
-
import
|
|
5
|
+
from typing import Any, Optional
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
import pandas as pd
|
|
8
8
|
|
|
9
|
-
from vtlengine.AST.Grammar.tokens import AND, OR, XOR, NOT
|
|
10
|
-
from vtlengine.DataTypes import Boolean
|
|
11
9
|
import vtlengine.Operators as Operator
|
|
10
|
+
from vtlengine.AST.Grammar.tokens import AND, NOT, OR, XOR
|
|
11
|
+
from vtlengine.DataTypes import Boolean
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class Unary(Operator.Unary):
|
|
@@ -30,9 +30,7 @@ class Binary(Operator.Binary):
|
|
|
30
30
|
|
|
31
31
|
@classmethod
|
|
32
32
|
def apply_operation_two_series(cls, left_series: Any, right_series: Any) -> Any:
|
|
33
|
-
result = cls.comp_op(
|
|
34
|
-
left_series.astype("bool[pyarrow]"), right_series.astype("bool[pyarrow]")
|
|
35
|
-
)
|
|
33
|
+
result = cls.comp_op(left_series.astype("boolean"), right_series.astype("boolean"))
|
|
36
34
|
return result.replace({pd.NA: None}).astype(object)
|
|
37
35
|
|
|
38
36
|
@classmethod
|
|
@@ -1,27 +1,27 @@
|
|
|
1
1
|
from copy import copy
|
|
2
|
-
from typing import Optional,
|
|
2
|
+
from typing import Any, Optional, Type, Union
|
|
3
3
|
|
|
4
|
-
import vtlengine.Operators as Operator
|
|
5
4
|
import pandas as pd
|
|
5
|
+
|
|
6
|
+
import vtlengine.Operators as Operator
|
|
7
|
+
from vtlengine.AST.Grammar.tokens import CAST
|
|
6
8
|
from vtlengine.DataTypes import (
|
|
7
9
|
COMP_NAME_MAPPING,
|
|
8
10
|
EXPLICIT_WITH_MASK_TYPE_PROMOTION_MAPPING,
|
|
9
11
|
EXPLICIT_WITHOUT_MASK_TYPE_PROMOTION_MAPPING,
|
|
10
12
|
IMPLICIT_TYPE_PROMOTION_MAPPING,
|
|
11
|
-
|
|
12
|
-
Number,
|
|
13
|
-
TimeInterval,
|
|
13
|
+
SCALAR_TYPES_CLASS_REVERSE,
|
|
14
14
|
Date,
|
|
15
|
-
TimePeriod,
|
|
16
15
|
Duration,
|
|
17
|
-
|
|
16
|
+
Number,
|
|
18
17
|
ScalarType,
|
|
18
|
+
String,
|
|
19
|
+
TimeInterval,
|
|
20
|
+
TimePeriod,
|
|
19
21
|
)
|
|
20
22
|
from vtlengine.DataTypes.TimeHandling import str_period_to_date
|
|
21
|
-
|
|
22
|
-
from vtlengine.AST.Grammar.tokens import CAST
|
|
23
23
|
from vtlengine.Exceptions import SemanticError
|
|
24
|
-
from vtlengine.Model import Component, Dataset, Role, Scalar
|
|
24
|
+
from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar
|
|
25
25
|
|
|
26
26
|
duration_mapping = {"A": 6, "S": 5, "Q": 4, "M": 3, "W": 2, "D": 1}
|
|
27
27
|
|
|
@@ -286,9 +286,8 @@ class Cast(Operator.Unary):
|
|
|
286
286
|
mask: Optional[str] = None,
|
|
287
287
|
) -> Any:
|
|
288
288
|
|
|
289
|
-
if mask is not None:
|
|
290
|
-
|
|
291
|
-
raise Exception(f"{cls.op} mask must be a string")
|
|
289
|
+
if mask is not None and not isinstance(mask, str):
|
|
290
|
+
raise Exception(f"{cls.op} mask must be a string")
|
|
292
291
|
|
|
293
292
|
if isinstance(operand, Dataset):
|
|
294
293
|
return cls.dataset_validation(operand, scalarType, mask)
|
vtlengine/Operators/Clause.py
CHANGED
|
@@ -1,21 +1,20 @@
|
|
|
1
|
-
import pandas as pd
|
|
2
|
-
|
|
3
1
|
from copy import copy
|
|
4
|
-
from typing import List,
|
|
2
|
+
from typing import List, Type, Union
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
5
|
|
|
6
|
+
from vtlengine.AST import RenameNode
|
|
7
|
+
from vtlengine.AST.Grammar.tokens import AGGREGATE, CALC, DROP, KEEP, RENAME, SUBSPACE
|
|
6
8
|
from vtlengine.DataTypes import (
|
|
7
9
|
Boolean,
|
|
10
|
+
ScalarType,
|
|
8
11
|
String,
|
|
9
12
|
check_unary_implicit_promotion,
|
|
10
13
|
unary_implicit_promotion,
|
|
11
|
-
ScalarType,
|
|
12
14
|
)
|
|
13
|
-
from vtlengine.Operators import Operator
|
|
14
|
-
|
|
15
|
-
from vtlengine.AST import RenameNode
|
|
16
|
-
from vtlengine.AST.Grammar.tokens import KEEP, DROP, RENAME, SUBSPACE, CALC, AGGREGATE
|
|
17
15
|
from vtlengine.Exceptions import SemanticError
|
|
18
16
|
from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar
|
|
17
|
+
from vtlengine.Operators import Operator
|
|
19
18
|
|
|
20
19
|
|
|
21
20
|
class Calc(Operator):
|
|
@@ -162,9 +161,8 @@ class Keep(Operator):
|
|
|
162
161
|
def evaluate(cls, operands: List[str], dataset: Dataset) -> Dataset:
|
|
163
162
|
if len(operands) == 0:
|
|
164
163
|
raise ValueError("Keep clause requires at least one operand")
|
|
165
|
-
if dataset is None:
|
|
166
|
-
|
|
167
|
-
raise ValueError("Keep clause requires at most one dataset operand")
|
|
164
|
+
if dataset is None and sum(isinstance(operand, Dataset) for operand in operands) != 1:
|
|
165
|
+
raise ValueError("Keep clause requires at most one dataset operand")
|
|
168
166
|
result_dataset = cls.validate(operands, dataset)
|
|
169
167
|
if dataset.data is not None:
|
|
170
168
|
result_dataset.data = dataset.data[dataset.get_identifiers_names() + operands]
|
|
@@ -212,11 +210,11 @@ class Rename(Operator):
|
|
|
212
210
|
raise SemanticError("1-3-1", alias=duplicates)
|
|
213
211
|
|
|
214
212
|
for operand in operands:
|
|
215
|
-
if operand.old_name not in dataset.components
|
|
213
|
+
if operand.old_name not in dataset.components:
|
|
216
214
|
raise SemanticError(
|
|
217
215
|
"1-1-1-10", op=cls.op, comp_name=operand.old_name, dataset_name=dataset.name
|
|
218
216
|
)
|
|
219
|
-
if operand.new_name in dataset.components
|
|
217
|
+
if operand.new_name in dataset.components:
|
|
220
218
|
raise SemanticError(
|
|
221
219
|
"1-1-6-8", op=cls.op, comp_name=operand.new_name, dataset_name=dataset.name
|
|
222
220
|
)
|
|
@@ -3,15 +3,13 @@ import re
|
|
|
3
3
|
from copy import copy
|
|
4
4
|
from typing import Any, Optional, Union
|
|
5
5
|
|
|
6
|
-
from vtlengine.Exceptions import SemanticError
|
|
7
|
-
from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar, ScalarSet
|
|
8
|
-
|
|
9
6
|
# if os.environ.get("SPARK"):
|
|
10
7
|
# import pyspark.pandas as pd
|
|
11
8
|
# else:
|
|
12
9
|
# import pandas as pd
|
|
13
10
|
import pandas as pd
|
|
14
11
|
|
|
12
|
+
import vtlengine.Operators as Operator
|
|
15
13
|
from vtlengine.AST.Grammar.tokens import (
|
|
16
14
|
CHARSET_MATCH,
|
|
17
15
|
EQ,
|
|
@@ -24,8 +22,9 @@ from vtlengine.AST.Grammar.tokens import (
|
|
|
24
22
|
NEQ,
|
|
25
23
|
NOT_IN,
|
|
26
24
|
)
|
|
27
|
-
from vtlengine.DataTypes import
|
|
28
|
-
|
|
25
|
+
from vtlengine.DataTypes import COMP_NAME_MAPPING, Boolean, Null, Number, String
|
|
26
|
+
from vtlengine.Exceptions import SemanticError
|
|
27
|
+
from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar, ScalarSet
|
|
29
28
|
|
|
30
29
|
|
|
31
30
|
class Unary(Operator.Unary):
|
|
@@ -75,11 +74,11 @@ class Binary(Operator.Binary):
|
|
|
75
74
|
return_type = Boolean
|
|
76
75
|
|
|
77
76
|
@classmethod
|
|
78
|
-
def _cast_values(
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
#
|
|
77
|
+
def _cast_values(cls,
|
|
78
|
+
x: Optional[Union[int, float, str, bool]],
|
|
79
|
+
y: Optional[Union[int, float, str, bool]]
|
|
80
|
+
) -> Any:
|
|
81
|
+
# Cast values to compatible types for comparison
|
|
83
82
|
try:
|
|
84
83
|
if isinstance(x, str) and isinstance(y, bool):
|
|
85
84
|
y = String.cast(y)
|
|
@@ -97,6 +96,7 @@ class Binary(Operator.Binary):
|
|
|
97
96
|
|
|
98
97
|
@classmethod
|
|
99
98
|
def op_func(cls, x: Any, y: Any) -> Any:
|
|
99
|
+
# Return None if any of the values are NaN
|
|
100
100
|
if pd.isnull(x) or pd.isnull(y):
|
|
101
101
|
return None
|
|
102
102
|
x, y = cls._cast_values(x, y)
|
|
@@ -104,12 +104,29 @@ class Binary(Operator.Binary):
|
|
|
104
104
|
|
|
105
105
|
@classmethod
|
|
106
106
|
def apply_operation_series_scalar(cls, series: Any, scalar: Any, series_left: bool) -> Any:
|
|
107
|
-
if scalar
|
|
107
|
+
if pd.isnull(scalar):
|
|
108
108
|
return pd.Series(None, index=series.index)
|
|
109
|
+
|
|
110
|
+
first_non_null = series.dropna().iloc[0] if not series.dropna().empty else None
|
|
111
|
+
if first_non_null is not None:
|
|
112
|
+
scalar, first_non_null = cls._cast_values(scalar, first_non_null)
|
|
113
|
+
|
|
114
|
+
series_type = pd.api.types.infer_dtype(series, skipna=True)
|
|
115
|
+
first_non_null_type = pd.api.types.infer_dtype([first_non_null])
|
|
116
|
+
|
|
117
|
+
if series_type != first_non_null_type:
|
|
118
|
+
if isinstance(first_non_null, str):
|
|
119
|
+
series = series.astype(str)
|
|
120
|
+
elif isinstance(first_non_null, (int, float)):
|
|
121
|
+
series = series.astype(float)
|
|
122
|
+
|
|
123
|
+
op = cls.py_op if cls.py_op is not None else cls.op_func
|
|
109
124
|
if series_left:
|
|
110
|
-
|
|
125
|
+
result = series.map(lambda x: op(x, scalar), na_action="ignore")
|
|
111
126
|
else:
|
|
112
|
-
|
|
127
|
+
result = series.map(lambda x: op(scalar, x), na_action="ignore")
|
|
128
|
+
|
|
129
|
+
return result
|
|
113
130
|
|
|
114
131
|
@classmethod
|
|
115
132
|
def apply_return_type_dataset(
|
|
@@ -408,10 +425,7 @@ class ExistIn(Operator.Operator):
|
|
|
408
425
|
reference_identifiers_names = left_id_names
|
|
409
426
|
|
|
410
427
|
# Checking if the left dataset is a subset of the right dataset
|
|
411
|
-
if is_subset_left
|
|
412
|
-
common_columns = left_id_names
|
|
413
|
-
else:
|
|
414
|
-
common_columns = right_id_names
|
|
428
|
+
common_columns = left_id_names if is_subset_left else right_id_names
|
|
415
429
|
|
|
416
430
|
# Check if the common identifiers are equal between the two datasets
|
|
417
431
|
if dataset_1.data is not None and dataset_2.data is not None:
|
|
@@ -1,31 +1,30 @@
|
|
|
1
1
|
from copy import copy
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Any, List, Union
|
|
3
3
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
|
|
6
|
+
# if os.environ.get("SPARK", False):
|
|
7
|
+
# import pyspark.pandas as pd
|
|
8
|
+
# else:
|
|
9
|
+
# import pandas as pd
|
|
10
|
+
import pandas as pd
|
|
11
|
+
|
|
6
12
|
from vtlengine.DataTypes import (
|
|
7
|
-
Boolean,
|
|
8
13
|
COMP_NAME_MAPPING,
|
|
9
|
-
binary_implicit_promotion,
|
|
10
14
|
SCALAR_TYPES_CLASS_REVERSE,
|
|
15
|
+
Boolean,
|
|
11
16
|
Null,
|
|
17
|
+
binary_implicit_promotion,
|
|
12
18
|
)
|
|
13
|
-
from vtlengine.Operators import Operator, Binary
|
|
14
|
-
|
|
15
19
|
from vtlengine.Exceptions import SemanticError
|
|
16
|
-
from vtlengine.Model import
|
|
17
|
-
|
|
18
|
-
# if os.environ.get("SPARK", False):
|
|
19
|
-
# import pyspark.pandas as pd
|
|
20
|
-
# else:
|
|
21
|
-
# import pandas as pd
|
|
22
|
-
import pandas as pd
|
|
20
|
+
from vtlengine.Model import DataComponent, Dataset, Role, Scalar
|
|
21
|
+
from vtlengine.Operators import Binary, Operator
|
|
23
22
|
|
|
24
23
|
|
|
25
24
|
class If(Operator):
|
|
26
25
|
"""
|
|
27
26
|
If class:
|
|
28
|
-
`If-then-else <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=225&zoom=100,72,142>`_ operator
|
|
27
|
+
`If-then-else <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=225&zoom=100,72,142>`_ operator
|
|
29
28
|
inherits from Operator, a superclass that contains general validate and evaluate class methods.
|
|
30
29
|
It has the following class methods:
|
|
31
30
|
Class methods:
|
|
@@ -40,7 +39,7 @@ class If(Operator):
|
|
|
40
39
|
validate: Class method that has two branches so datacomponent and datasets can be validated. With datacomponent,
|
|
41
40
|
the code reviews if it is actually a Measure and if it is a binary operation. Dataset branch reviews if the
|
|
42
41
|
identifiers are the same in 'if', 'then' and 'else'.
|
|
43
|
-
"""
|
|
42
|
+
""" # noqa E501
|
|
44
43
|
|
|
45
44
|
@classmethod
|
|
46
45
|
def evaluate(cls, condition: Any, true_branch: Any, false_branch: Any) -> Any:
|
|
@@ -108,7 +107,7 @@ class If(Operator):
|
|
|
108
107
|
)
|
|
109
108
|
if isinstance(result, Dataset):
|
|
110
109
|
drop_columns = [
|
|
111
|
-
column for column in result.data.columns if column not in result.components
|
|
110
|
+
column for column in result.data.columns if column not in result.components
|
|
112
111
|
]
|
|
113
112
|
result.data = result.data.dropna(subset=drop_columns).drop(columns=drop_columns)
|
|
114
113
|
if isinstance(true_branch, Scalar) and isinstance(false_branch, Scalar):
|
|
@@ -213,14 +212,14 @@ class If(Operator):
|
|
|
213
212
|
class Nvl(Binary):
|
|
214
213
|
"""
|
|
215
214
|
Null class:
|
|
216
|
-
`Nvl <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=229&zoom=100,72,370>`_operator class.
|
|
215
|
+
`Nvl <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=229&zoom=100,72,370>`_operator class.
|
|
217
216
|
It has the following class methods:
|
|
218
217
|
|
|
219
218
|
Class methods:
|
|
220
219
|
Validate: Class method that validates if the operation at scalar,
|
|
221
220
|
datacomponent or dataset level can be performed.
|
|
222
221
|
Evaluate: Evaluates the actual operation, returning the result.
|
|
223
|
-
"""
|
|
222
|
+
""" # noqa E501
|
|
224
223
|
|
|
225
224
|
@classmethod
|
|
226
225
|
def evaluate(cls, left: Any, right: Any) -> Union[Scalar, DataComponent, Dataset]:
|
|
@@ -288,3 +287,144 @@ class Nvl(Binary):
|
|
|
288
287
|
for comp in result_components.values():
|
|
289
288
|
comp.nullable = False
|
|
290
289
|
return Dataset(name="result", components=result_components, data=None)
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
class Case(Operator):
|
|
293
|
+
|
|
294
|
+
@classmethod
|
|
295
|
+
def evaluate(
|
|
296
|
+
cls, conditions: List[Any], thenOps: List[Any], elseOp: Any
|
|
297
|
+
) -> Union[Scalar, DataComponent, Dataset]:
|
|
298
|
+
|
|
299
|
+
result = cls.validate(conditions, thenOps, elseOp)
|
|
300
|
+
|
|
301
|
+
if isinstance(result, Scalar):
|
|
302
|
+
result.value = elseOp.value
|
|
303
|
+
for i in range(len(conditions)):
|
|
304
|
+
if conditions[i].value:
|
|
305
|
+
result.value = thenOps[i].value
|
|
306
|
+
|
|
307
|
+
if isinstance(result, DataComponent):
|
|
308
|
+
result.data = pd.Series(None, index=conditions[0].data.index)
|
|
309
|
+
|
|
310
|
+
for i, condition in enumerate(conditions):
|
|
311
|
+
value = thenOps[i].value if isinstance(thenOps[i], Scalar) else thenOps[i].data
|
|
312
|
+
result.data = np.where(
|
|
313
|
+
condition.data, value, result.data # type: ignore[call-overload]
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
condition_mask_else = ~np.any([condition.data for condition in conditions], axis=0)
|
|
317
|
+
else_value = elseOp.value if isinstance(elseOp, Scalar) else elseOp.data
|
|
318
|
+
result.data = pd.Series(
|
|
319
|
+
np.where(condition_mask_else, else_value, result.data),
|
|
320
|
+
index=conditions[0].data.index,
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
if isinstance(result, Dataset):
|
|
324
|
+
identifiers = result.get_identifiers_names()
|
|
325
|
+
columns = [col for col in result.get_components_names() if col not in identifiers]
|
|
326
|
+
result.data = (
|
|
327
|
+
conditions[0].data[identifiers]
|
|
328
|
+
if conditions[0].data is not None
|
|
329
|
+
else pd.DataFrame(columns=identifiers)
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
for i in range(len(conditions)):
|
|
333
|
+
condition = conditions[i]
|
|
334
|
+
bool_col = next(x.name for x in condition.get_measures() if x.data_type == Boolean)
|
|
335
|
+
condition_mask = condition.data[bool_col]
|
|
336
|
+
|
|
337
|
+
result.data.loc[condition_mask, columns] = (
|
|
338
|
+
thenOps[i].value
|
|
339
|
+
if isinstance(thenOps[i], Scalar)
|
|
340
|
+
else thenOps[i].data.loc[condition_mask, columns]
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
condition_mask_else = ~np.logical_or.reduce(
|
|
344
|
+
[
|
|
345
|
+
condition.data[
|
|
346
|
+
next(x.name for x in condition.get_measures() if x.data_type == Boolean)
|
|
347
|
+
].astype(bool)
|
|
348
|
+
for condition in conditions
|
|
349
|
+
]
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
result.data.loc[condition_mask_else, columns] = (
|
|
353
|
+
elseOp.value
|
|
354
|
+
if isinstance(elseOp, Scalar)
|
|
355
|
+
else elseOp.data.loc[condition_mask_else, columns]
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
return result
|
|
359
|
+
|
|
360
|
+
@classmethod
|
|
361
|
+
def validate(
|
|
362
|
+
cls, conditions: List[Any], thenOps: List[Any], elseOp: Any
|
|
363
|
+
) -> Union[Scalar, DataComponent, Dataset]:
|
|
364
|
+
|
|
365
|
+
if len(set(map(type, conditions))) > 1:
|
|
366
|
+
raise SemanticError("2-1-9-1", op=cls.op)
|
|
367
|
+
|
|
368
|
+
ops = thenOps + [elseOp]
|
|
369
|
+
then_else_types = set(map(type, ops))
|
|
370
|
+
condition_type = type(conditions[0])
|
|
371
|
+
|
|
372
|
+
if condition_type is Scalar:
|
|
373
|
+
for condition in conditions:
|
|
374
|
+
if condition.data_type != Boolean:
|
|
375
|
+
raise SemanticError("2-1-9-2", op=cls.op, name=condition.name)
|
|
376
|
+
if list(then_else_types) != [Scalar]:
|
|
377
|
+
raise SemanticError("2-1-9-3", op=cls.op)
|
|
378
|
+
|
|
379
|
+
# The output data type is the data type of the last then operation that has a true
|
|
380
|
+
# condition, defaulting to the data type of the else operation if no condition is true
|
|
381
|
+
output_data_type = elseOp.data_type
|
|
382
|
+
for i in range(len(conditions)):
|
|
383
|
+
if conditions[i].value:
|
|
384
|
+
output_data_type = thenOps[i].data_type
|
|
385
|
+
|
|
386
|
+
return Scalar(
|
|
387
|
+
name="result",
|
|
388
|
+
value=None,
|
|
389
|
+
data_type=output_data_type,
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
elif condition_type is DataComponent:
|
|
393
|
+
for condition in conditions:
|
|
394
|
+
if not condition.data_type == Boolean:
|
|
395
|
+
raise SemanticError("2-1-9-4", op=cls.op, name=condition.name)
|
|
396
|
+
|
|
397
|
+
nullable = any(
|
|
398
|
+
thenOp.nullable if isinstance(thenOp, DataComponent) else thenOp.data_type == Null
|
|
399
|
+
for thenOp in ops
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
data_type = ops[0].data_type
|
|
403
|
+
for op in ops[1:]:
|
|
404
|
+
data_type = binary_implicit_promotion(data_type, op.data_type)
|
|
405
|
+
|
|
406
|
+
return DataComponent(
|
|
407
|
+
name="result",
|
|
408
|
+
data=None,
|
|
409
|
+
data_type=data_type,
|
|
410
|
+
role=Role.MEASURE,
|
|
411
|
+
nullable=nullable,
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
# Dataset
|
|
415
|
+
for condition in conditions:
|
|
416
|
+
if len(condition.get_measures_names()) != 1:
|
|
417
|
+
raise SemanticError("1-1-1-4", op=cls.op)
|
|
418
|
+
if condition.get_measures()[0].data_type != Boolean:
|
|
419
|
+
raise SemanticError("2-1-9-5", op=cls.op, name=condition.name)
|
|
420
|
+
|
|
421
|
+
if Dataset not in then_else_types:
|
|
422
|
+
raise SemanticError("2-1-9-6", op=cls.op)
|
|
423
|
+
|
|
424
|
+
components = next(op for op in ops if isinstance(op, Dataset)).components
|
|
425
|
+
comp_names = [comp.name for comp in components.values()]
|
|
426
|
+
for op in ops:
|
|
427
|
+
if isinstance(op, Dataset) and op.get_components_names() != comp_names:
|
|
428
|
+
raise SemanticError("2-1-9-7", op=cls.op)
|
|
429
|
+
|
|
430
|
+
return Dataset(name="result", components=components, data=None)
|
vtlengine/Operators/General.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
|
|
1
|
+
import sqlite3
|
|
2
|
+
from typing import Any, Dict, List, Union
|
|
2
3
|
|
|
3
4
|
import pandas as pd
|
|
4
|
-
import sqlite3
|
|
5
5
|
|
|
6
6
|
from vtlengine.DataTypes import COMP_NAME_MAPPING
|
|
7
7
|
from vtlengine.Exceptions import SemanticError
|
|
8
|
-
from vtlengine.Model import
|
|
8
|
+
from vtlengine.Model import Component, DataComponent, Dataset, ExternalRoutine, Role
|
|
9
9
|
from vtlengine.Operators import Binary, Unary
|
|
10
10
|
|
|
11
11
|
|
|
@@ -143,7 +143,7 @@ class Eval(Unary):
|
|
|
143
143
|
df = cls._execute_query(
|
|
144
144
|
external_routine.query, external_routine.dataset_names, empty_data_dict
|
|
145
145
|
)
|
|
146
|
-
component_names =
|
|
146
|
+
component_names = df.columns.tolist()
|
|
147
147
|
for comp_name in component_names:
|
|
148
148
|
if comp_name not in output.components:
|
|
149
149
|
raise SemanticError(
|