vtlengine 1.0.2__py3-none-any.whl → 1.0.3rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +12 -5
- vtlengine/API/__init__.py +8 -8
- vtlengine/AST/ASTConstructor.py +23 -43
- vtlengine/AST/ASTConstructorModules/Expr.py +69 -84
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +47 -57
- vtlengine/AST/ASTConstructorModules/Terminals.py +28 -39
- vtlengine/AST/ASTTemplate.py +0 -1
- vtlengine/AST/DAG/__init__.py +12 -15
- vtlengine/AST/Grammar/tokens.py +2 -2
- vtlengine/AST/VtlVisitor.py +0 -1
- vtlengine/AST/__init__.py +2 -3
- vtlengine/DataTypes/TimeHandling.py +10 -7
- vtlengine/DataTypes/__init__.py +17 -24
- vtlengine/Exceptions/__init__.py +3 -5
- vtlengine/Exceptions/messages.py +68 -56
- vtlengine/Interpreter/__init__.py +82 -103
- vtlengine/Model/__init__.py +10 -12
- vtlengine/Operators/Aggregation.py +14 -14
- vtlengine/Operators/Analytic.py +3 -10
- vtlengine/Operators/Assignment.py +2 -3
- vtlengine/Operators/Boolean.py +5 -7
- vtlengine/Operators/CastOperator.py +12 -13
- vtlengine/Operators/Clause.py +11 -13
- vtlengine/Operators/Comparison.py +31 -17
- vtlengine/Operators/Conditional.py +48 -49
- vtlengine/Operators/General.py +4 -4
- vtlengine/Operators/HROperators.py +41 -34
- vtlengine/Operators/Join.py +18 -22
- vtlengine/Operators/Numeric.py +44 -45
- vtlengine/Operators/RoleSetter.py +6 -8
- vtlengine/Operators/Set.py +7 -12
- vtlengine/Operators/String.py +19 -27
- vtlengine/Operators/Time.py +298 -109
- vtlengine/Operators/Validation.py +4 -7
- vtlengine/Operators/__init__.py +38 -41
- vtlengine/Utils/__init__.py +133 -114
- vtlengine/__init__.py +1 -1
- vtlengine/files/output/__init__.py +2 -2
- vtlengine/files/output/_time_period_representation.py +0 -1
- vtlengine/files/parser/__init__.py +16 -18
- vtlengine/files/parser/_time_checking.py +1 -2
- {vtlengine-1.0.2.dist-info → vtlengine-1.0.3rc1.dist-info}/METADATA +1 -3
- vtlengine-1.0.3rc1.dist-info/RECORD +58 -0
- vtlengine-1.0.2.dist-info/RECORD +0 -58
- {vtlengine-1.0.2.dist-info → vtlengine-1.0.3rc1.dist-info}/LICENSE.md +0 -0
- {vtlengine-1.0.2.dist-info → vtlengine-1.0.3rc1.dist-info}/WHEEL +0 -0
vtlengine/Model/__init__.py
CHANGED
|
@@ -2,17 +2,18 @@ import json
|
|
|
2
2
|
from collections import Counter
|
|
3
3
|
from dataclasses import dataclass
|
|
4
4
|
from enum import Enum
|
|
5
|
-
from typing import Dict, List, Optional,
|
|
5
|
+
from typing import Any, Dict, List, Optional, Type, Union
|
|
6
6
|
|
|
7
|
-
import vtlengine.DataTypes as DataTypes
|
|
8
7
|
import pandas as pd
|
|
9
8
|
import sqlglot
|
|
10
9
|
import sqlglot.expressions as exp
|
|
11
|
-
from vtlengine.DataTypes import SCALAR_TYPES, ScalarType
|
|
12
|
-
from vtlengine.DataTypes.TimeHandling import TimePeriodHandler
|
|
13
10
|
from pandas import DataFrame as PandasDataFrame
|
|
14
11
|
from pandas._testing import assert_frame_equal
|
|
15
12
|
|
|
13
|
+
import vtlengine.DataTypes as DataTypes
|
|
14
|
+
from vtlengine.DataTypes import SCALAR_TYPES, ScalarType
|
|
15
|
+
from vtlengine.DataTypes.TimeHandling import TimePeriodHandler
|
|
16
|
+
from vtlengine.Exceptions import SemanticError
|
|
16
17
|
|
|
17
18
|
# from pyspark.pandas import DataFrame as SparkDataFrame, Series as SparkSeries
|
|
18
19
|
|
|
@@ -159,7 +160,7 @@ class Dataset:
|
|
|
159
160
|
raise ValueError(
|
|
160
161
|
"The number of components must match the number of columns in the data"
|
|
161
162
|
)
|
|
162
|
-
for name,
|
|
163
|
+
for name, _ in self.components.items():
|
|
163
164
|
if name not in self.data.columns:
|
|
164
165
|
raise ValueError(f"Component {name} not found in the data")
|
|
165
166
|
|
|
@@ -209,8 +210,8 @@ class Dataset:
|
|
|
209
210
|
return True
|
|
210
211
|
elif self.data is None or other.data is None:
|
|
211
212
|
return False
|
|
212
|
-
if len(self.data) == len(other.data) == 0:
|
|
213
|
-
|
|
213
|
+
if len(self.data) == len(other.data) == 0 and self.data.shape != other.data.shape:
|
|
214
|
+
raise SemanticError("0-1-1-14", dataset1=self.name, dataset2=other.name)
|
|
214
215
|
|
|
215
216
|
self.data.fillna("", inplace=True)
|
|
216
217
|
other.data.fillna("", inplace=True)
|
|
@@ -234,11 +235,8 @@ class Dataset:
|
|
|
234
235
|
lambda x: str(TimePeriodHandler(x)) if x != "" else "", na_action="ignore"
|
|
235
236
|
)
|
|
236
237
|
elif type_name in ["Integer", "Number"]:
|
|
237
|
-
if type_name == "Integer"
|
|
238
|
-
|
|
239
|
-
else:
|
|
240
|
-
type_ = "float32"
|
|
241
|
-
# We use here a number to avoid errors on equality on empty strings
|
|
238
|
+
type_ = "int64" if type_name == "Integer" else "float32"
|
|
239
|
+
# We use here a number to avoid errors on equality on empty strings
|
|
242
240
|
self.data[comp.name] = (
|
|
243
241
|
self.data[comp.name]
|
|
244
242
|
.replace("", -1234997)
|
|
@@ -1,19 +1,8 @@
|
|
|
1
1
|
from copy import copy
|
|
2
|
-
from typing import List, Optional
|
|
2
|
+
from typing import Any, List, Optional
|
|
3
3
|
|
|
4
4
|
import duckdb
|
|
5
5
|
import pandas as pd
|
|
6
|
-
from vtlengine.DataTypes import (
|
|
7
|
-
Integer,
|
|
8
|
-
Number,
|
|
9
|
-
unary_implicit_promotion,
|
|
10
|
-
Boolean,
|
|
11
|
-
String,
|
|
12
|
-
Duration,
|
|
13
|
-
TimeInterval,
|
|
14
|
-
TimePeriod,
|
|
15
|
-
Date,
|
|
16
|
-
)
|
|
17
6
|
|
|
18
7
|
import vtlengine.Operators as Operator
|
|
19
8
|
from vtlengine.AST.Grammar.tokens import (
|
|
@@ -28,11 +17,22 @@ from vtlengine.AST.Grammar.tokens import (
|
|
|
28
17
|
VAR_POP,
|
|
29
18
|
VAR_SAMP,
|
|
30
19
|
)
|
|
20
|
+
from vtlengine.DataTypes import (
|
|
21
|
+
Boolean,
|
|
22
|
+
Date,
|
|
23
|
+
Duration,
|
|
24
|
+
Integer,
|
|
25
|
+
Number,
|
|
26
|
+
String,
|
|
27
|
+
TimeInterval,
|
|
28
|
+
TimePeriod,
|
|
29
|
+
unary_implicit_promotion,
|
|
30
|
+
)
|
|
31
31
|
from vtlengine.DataTypes.TimeHandling import (
|
|
32
32
|
DURATION_MAPPING,
|
|
33
33
|
DURATION_MAPPING_REVERSED,
|
|
34
|
-
TimePeriodHandler,
|
|
35
34
|
TimeIntervalHandler,
|
|
35
|
+
TimePeriodHandler,
|
|
36
36
|
)
|
|
37
37
|
from vtlengine.Exceptions import SemanticError
|
|
38
38
|
from vtlengine.Model import Component, Dataset, Role
|
|
@@ -153,7 +153,7 @@ class Aggregation(Operator.Unary):
|
|
|
153
153
|
if comp.role == Role.ATTRIBUTE:
|
|
154
154
|
del result_components[comp_name]
|
|
155
155
|
# Change Measure data type
|
|
156
|
-
for
|
|
156
|
+
for _, comp in result_components.items():
|
|
157
157
|
if comp.role == Role.MEASURE:
|
|
158
158
|
unary_implicit_promotion(comp.data_type, cls.type_to_check)
|
|
159
159
|
if cls.return_type is not None:
|
vtlengine/Operators/Analytic.py
CHANGED
|
@@ -3,8 +3,6 @@ from typing import List, Optional
|
|
|
3
3
|
|
|
4
4
|
import duckdb
|
|
5
5
|
|
|
6
|
-
from vtlengine.Exceptions import SemanticError
|
|
7
|
-
|
|
8
6
|
# if os.environ.get("SPARK"):
|
|
9
7
|
# import pyspark.pandas as pd
|
|
10
8
|
# else:
|
|
@@ -32,6 +30,7 @@ from vtlengine.AST.Grammar.tokens import (
|
|
|
32
30
|
VAR_SAMP,
|
|
33
31
|
)
|
|
34
32
|
from vtlengine.DataTypes import COMP_NAME_MAPPING, Integer, Number, unary_implicit_promotion
|
|
33
|
+
from vtlengine.Exceptions import SemanticError
|
|
35
34
|
from vtlengine.Model import Component, Dataset, Role
|
|
36
35
|
|
|
37
36
|
|
|
@@ -60,10 +59,7 @@ class Analytic(Operator.Unary):
|
|
|
60
59
|
params: Optional[List[int]],
|
|
61
60
|
component_name: Optional[str] = None,
|
|
62
61
|
) -> Dataset:
|
|
63
|
-
if ordering is None
|
|
64
|
-
order_components = []
|
|
65
|
-
else:
|
|
66
|
-
order_components = [o.component for o in ordering]
|
|
62
|
+
order_components = [] if ordering is None else [o.component for o in ordering]
|
|
67
63
|
identifier_names = operand.get_identifiers_names()
|
|
68
64
|
result_components = operand.components.copy()
|
|
69
65
|
|
|
@@ -178,10 +174,7 @@ class Analytic(Operator.Unary):
|
|
|
178
174
|
window_str = f"{mode} BETWEEN {window.start} {start_mode} AND {window.stop} {stop_mode}"
|
|
179
175
|
|
|
180
176
|
# Partitioning
|
|
181
|
-
if len(partitioning) > 0
|
|
182
|
-
partition = "PARTITION BY " + ", ".join(partitioning)
|
|
183
|
-
else:
|
|
184
|
-
partition = ""
|
|
177
|
+
partition = "PARTITION BY " + ", ".join(partitioning) if len(partitioning) > 0 else ""
|
|
185
178
|
|
|
186
179
|
# Ordering
|
|
187
180
|
order_str = ""
|
|
@@ -1,9 +1,8 @@
|
|
|
1
|
-
from typing import
|
|
2
|
-
|
|
3
|
-
from vtlengine.Operators import Binary
|
|
1
|
+
from typing import Any, Union
|
|
4
2
|
|
|
5
3
|
from vtlengine.Exceptions import SemanticError
|
|
6
4
|
from vtlengine.Model import DataComponent, Dataset
|
|
5
|
+
from vtlengine.Operators import Binary
|
|
7
6
|
|
|
8
7
|
ALL_MODEL_TYPES = Union[DataComponent, Dataset]
|
|
9
8
|
|
vtlengine/Operators/Boolean.py
CHANGED
|
@@ -2,13 +2,13 @@
|
|
|
2
2
|
# import pyspark.pandas as pd
|
|
3
3
|
# else:
|
|
4
4
|
# import pandas as pd
|
|
5
|
-
import
|
|
5
|
+
from typing import Any, Optional
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
import pandas as pd
|
|
8
8
|
|
|
9
|
-
from vtlengine.AST.Grammar.tokens import AND, OR, XOR, NOT
|
|
10
|
-
from vtlengine.DataTypes import Boolean
|
|
11
9
|
import vtlengine.Operators as Operator
|
|
10
|
+
from vtlengine.AST.Grammar.tokens import AND, NOT, OR, XOR
|
|
11
|
+
from vtlengine.DataTypes import Boolean
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class Unary(Operator.Unary):
|
|
@@ -30,9 +30,7 @@ class Binary(Operator.Binary):
|
|
|
30
30
|
|
|
31
31
|
@classmethod
|
|
32
32
|
def apply_operation_two_series(cls, left_series: Any, right_series: Any) -> Any:
|
|
33
|
-
result = cls.comp_op(
|
|
34
|
-
left_series.astype("boolean"), right_series.astype("boolean")
|
|
35
|
-
)
|
|
33
|
+
result = cls.comp_op(left_series.astype("boolean"), right_series.astype("boolean"))
|
|
36
34
|
return result.replace({pd.NA: None}).astype(object)
|
|
37
35
|
|
|
38
36
|
@classmethod
|
|
@@ -1,27 +1,27 @@
|
|
|
1
1
|
from copy import copy
|
|
2
|
-
from typing import Optional,
|
|
2
|
+
from typing import Any, Optional, Type, Union
|
|
3
3
|
|
|
4
|
-
import vtlengine.Operators as Operator
|
|
5
4
|
import pandas as pd
|
|
5
|
+
|
|
6
|
+
import vtlengine.Operators as Operator
|
|
7
|
+
from vtlengine.AST.Grammar.tokens import CAST
|
|
6
8
|
from vtlengine.DataTypes import (
|
|
7
9
|
COMP_NAME_MAPPING,
|
|
8
10
|
EXPLICIT_WITH_MASK_TYPE_PROMOTION_MAPPING,
|
|
9
11
|
EXPLICIT_WITHOUT_MASK_TYPE_PROMOTION_MAPPING,
|
|
10
12
|
IMPLICIT_TYPE_PROMOTION_MAPPING,
|
|
11
|
-
|
|
12
|
-
Number,
|
|
13
|
-
TimeInterval,
|
|
13
|
+
SCALAR_TYPES_CLASS_REVERSE,
|
|
14
14
|
Date,
|
|
15
|
-
TimePeriod,
|
|
16
15
|
Duration,
|
|
17
|
-
|
|
16
|
+
Number,
|
|
18
17
|
ScalarType,
|
|
18
|
+
String,
|
|
19
|
+
TimeInterval,
|
|
20
|
+
TimePeriod,
|
|
19
21
|
)
|
|
20
22
|
from vtlengine.DataTypes.TimeHandling import str_period_to_date
|
|
21
|
-
|
|
22
|
-
from vtlengine.AST.Grammar.tokens import CAST
|
|
23
23
|
from vtlengine.Exceptions import SemanticError
|
|
24
|
-
from vtlengine.Model import Component, Dataset, Role, Scalar
|
|
24
|
+
from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar
|
|
25
25
|
|
|
26
26
|
duration_mapping = {"A": 6, "S": 5, "Q": 4, "M": 3, "W": 2, "D": 1}
|
|
27
27
|
|
|
@@ -286,9 +286,8 @@ class Cast(Operator.Unary):
|
|
|
286
286
|
mask: Optional[str] = None,
|
|
287
287
|
) -> Any:
|
|
288
288
|
|
|
289
|
-
if mask is not None:
|
|
290
|
-
|
|
291
|
-
raise Exception(f"{cls.op} mask must be a string")
|
|
289
|
+
if mask is not None and not isinstance(mask, str):
|
|
290
|
+
raise Exception(f"{cls.op} mask must be a string")
|
|
292
291
|
|
|
293
292
|
if isinstance(operand, Dataset):
|
|
294
293
|
return cls.dataset_validation(operand, scalarType, mask)
|
vtlengine/Operators/Clause.py
CHANGED
|
@@ -1,21 +1,20 @@
|
|
|
1
|
-
import pandas as pd
|
|
2
|
-
|
|
3
1
|
from copy import copy
|
|
4
|
-
from typing import List,
|
|
2
|
+
from typing import List, Type, Union
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
5
|
|
|
6
|
+
from vtlengine.AST import RenameNode
|
|
7
|
+
from vtlengine.AST.Grammar.tokens import AGGREGATE, CALC, DROP, KEEP, RENAME, SUBSPACE
|
|
6
8
|
from vtlengine.DataTypes import (
|
|
7
9
|
Boolean,
|
|
10
|
+
ScalarType,
|
|
8
11
|
String,
|
|
9
12
|
check_unary_implicit_promotion,
|
|
10
13
|
unary_implicit_promotion,
|
|
11
|
-
ScalarType,
|
|
12
14
|
)
|
|
13
|
-
from vtlengine.Operators import Operator
|
|
14
|
-
|
|
15
|
-
from vtlengine.AST import RenameNode
|
|
16
|
-
from vtlengine.AST.Grammar.tokens import KEEP, DROP, RENAME, SUBSPACE, CALC, AGGREGATE
|
|
17
15
|
from vtlengine.Exceptions import SemanticError
|
|
18
16
|
from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar
|
|
17
|
+
from vtlengine.Operators import Operator
|
|
19
18
|
|
|
20
19
|
|
|
21
20
|
class Calc(Operator):
|
|
@@ -162,9 +161,8 @@ class Keep(Operator):
|
|
|
162
161
|
def evaluate(cls, operands: List[str], dataset: Dataset) -> Dataset:
|
|
163
162
|
if len(operands) == 0:
|
|
164
163
|
raise ValueError("Keep clause requires at least one operand")
|
|
165
|
-
if dataset is None:
|
|
166
|
-
|
|
167
|
-
raise ValueError("Keep clause requires at most one dataset operand")
|
|
164
|
+
if dataset is None and sum(isinstance(operand, Dataset) for operand in operands) != 1:
|
|
165
|
+
raise ValueError("Keep clause requires at most one dataset operand")
|
|
168
166
|
result_dataset = cls.validate(operands, dataset)
|
|
169
167
|
if dataset.data is not None:
|
|
170
168
|
result_dataset.data = dataset.data[dataset.get_identifiers_names() + operands]
|
|
@@ -212,11 +210,11 @@ class Rename(Operator):
|
|
|
212
210
|
raise SemanticError("1-3-1", alias=duplicates)
|
|
213
211
|
|
|
214
212
|
for operand in operands:
|
|
215
|
-
if operand.old_name not in dataset.components
|
|
213
|
+
if operand.old_name not in dataset.components:
|
|
216
214
|
raise SemanticError(
|
|
217
215
|
"1-1-1-10", op=cls.op, comp_name=operand.old_name, dataset_name=dataset.name
|
|
218
216
|
)
|
|
219
|
-
if operand.new_name in dataset.components
|
|
217
|
+
if operand.new_name in dataset.components:
|
|
220
218
|
raise SemanticError(
|
|
221
219
|
"1-1-6-8", op=cls.op, comp_name=operand.new_name, dataset_name=dataset.name
|
|
222
220
|
)
|
|
@@ -3,15 +3,13 @@ import re
|
|
|
3
3
|
from copy import copy
|
|
4
4
|
from typing import Any, Optional, Union
|
|
5
5
|
|
|
6
|
-
from vtlengine.Exceptions import SemanticError
|
|
7
|
-
from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar, ScalarSet
|
|
8
|
-
|
|
9
6
|
# if os.environ.get("SPARK"):
|
|
10
7
|
# import pyspark.pandas as pd
|
|
11
8
|
# else:
|
|
12
9
|
# import pandas as pd
|
|
13
10
|
import pandas as pd
|
|
14
11
|
|
|
12
|
+
import vtlengine.Operators as Operator
|
|
15
13
|
from vtlengine.AST.Grammar.tokens import (
|
|
16
14
|
CHARSET_MATCH,
|
|
17
15
|
EQ,
|
|
@@ -24,8 +22,9 @@ from vtlengine.AST.Grammar.tokens import (
|
|
|
24
22
|
NEQ,
|
|
25
23
|
NOT_IN,
|
|
26
24
|
)
|
|
27
|
-
from vtlengine.DataTypes import
|
|
28
|
-
|
|
25
|
+
from vtlengine.DataTypes import COMP_NAME_MAPPING, Boolean, Null, Number, String
|
|
26
|
+
from vtlengine.Exceptions import SemanticError
|
|
27
|
+
from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar, ScalarSet
|
|
29
28
|
|
|
30
29
|
|
|
31
30
|
class Unary(Operator.Unary):
|
|
@@ -75,11 +74,11 @@ class Binary(Operator.Binary):
|
|
|
75
74
|
return_type = Boolean
|
|
76
75
|
|
|
77
76
|
@classmethod
|
|
78
|
-
def _cast_values(
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
#
|
|
77
|
+
def _cast_values(cls,
|
|
78
|
+
x: Optional[Union[int, float, str, bool]],
|
|
79
|
+
y: Optional[Union[int, float, str, bool]]
|
|
80
|
+
) -> Any:
|
|
81
|
+
# Cast values to compatible types for comparison
|
|
83
82
|
try:
|
|
84
83
|
if isinstance(x, str) and isinstance(y, bool):
|
|
85
84
|
y = String.cast(y)
|
|
@@ -97,6 +96,7 @@ class Binary(Operator.Binary):
|
|
|
97
96
|
|
|
98
97
|
@classmethod
|
|
99
98
|
def op_func(cls, x: Any, y: Any) -> Any:
|
|
99
|
+
# Return None if any of the values are NaN
|
|
100
100
|
if pd.isnull(x) or pd.isnull(y):
|
|
101
101
|
return None
|
|
102
102
|
x, y = cls._cast_values(x, y)
|
|
@@ -104,12 +104,29 @@ class Binary(Operator.Binary):
|
|
|
104
104
|
|
|
105
105
|
@classmethod
|
|
106
106
|
def apply_operation_series_scalar(cls, series: Any, scalar: Any, series_left: bool) -> Any:
|
|
107
|
-
if scalar
|
|
107
|
+
if pd.isnull(scalar):
|
|
108
108
|
return pd.Series(None, index=series.index)
|
|
109
|
+
|
|
110
|
+
first_non_null = series.dropna().iloc[0] if not series.dropna().empty else None
|
|
111
|
+
if first_non_null is not None:
|
|
112
|
+
scalar, first_non_null = cls._cast_values(scalar, first_non_null)
|
|
113
|
+
|
|
114
|
+
series_type = pd.api.types.infer_dtype(series, skipna=True)
|
|
115
|
+
first_non_null_type = pd.api.types.infer_dtype([first_non_null])
|
|
116
|
+
|
|
117
|
+
if series_type != first_non_null_type:
|
|
118
|
+
if isinstance(first_non_null, str):
|
|
119
|
+
series = series.astype(str)
|
|
120
|
+
elif isinstance(first_non_null, (int, float)):
|
|
121
|
+
series = series.astype(float)
|
|
122
|
+
|
|
123
|
+
op = cls.py_op if cls.py_op is not None else cls.op_func
|
|
109
124
|
if series_left:
|
|
110
|
-
|
|
125
|
+
result = series.map(lambda x: op(x, scalar), na_action="ignore")
|
|
111
126
|
else:
|
|
112
|
-
|
|
127
|
+
result = series.map(lambda x: op(scalar, x), na_action="ignore")
|
|
128
|
+
|
|
129
|
+
return result
|
|
113
130
|
|
|
114
131
|
@classmethod
|
|
115
132
|
def apply_return_type_dataset(
|
|
@@ -408,10 +425,7 @@ class ExistIn(Operator.Operator):
|
|
|
408
425
|
reference_identifiers_names = left_id_names
|
|
409
426
|
|
|
410
427
|
# Checking if the left dataset is a subset of the right dataset
|
|
411
|
-
if is_subset_left
|
|
412
|
-
common_columns = left_id_names
|
|
413
|
-
else:
|
|
414
|
-
common_columns = right_id_names
|
|
428
|
+
common_columns = left_id_names if is_subset_left else right_id_names
|
|
415
429
|
|
|
416
430
|
# Check if the common identifiers are equal between the two datasets
|
|
417
431
|
if dataset_1.data is not None and dataset_2.data is not None:
|
|
@@ -1,31 +1,30 @@
|
|
|
1
1
|
from copy import copy
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Any, List, Union
|
|
3
3
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
|
|
6
|
+
# if os.environ.get("SPARK", False):
|
|
7
|
+
# import pyspark.pandas as pd
|
|
8
|
+
# else:
|
|
9
|
+
# import pandas as pd
|
|
10
|
+
import pandas as pd
|
|
11
|
+
|
|
6
12
|
from vtlengine.DataTypes import (
|
|
7
|
-
Boolean,
|
|
8
13
|
COMP_NAME_MAPPING,
|
|
9
|
-
binary_implicit_promotion,
|
|
10
14
|
SCALAR_TYPES_CLASS_REVERSE,
|
|
15
|
+
Boolean,
|
|
11
16
|
Null,
|
|
17
|
+
binary_implicit_promotion,
|
|
12
18
|
)
|
|
13
|
-
from vtlengine.Operators import Operator, Binary
|
|
14
|
-
|
|
15
19
|
from vtlengine.Exceptions import SemanticError
|
|
16
|
-
from vtlengine.Model import
|
|
17
|
-
|
|
18
|
-
# if os.environ.get("SPARK", False):
|
|
19
|
-
# import pyspark.pandas as pd
|
|
20
|
-
# else:
|
|
21
|
-
# import pandas as pd
|
|
22
|
-
import pandas as pd
|
|
20
|
+
from vtlengine.Model import DataComponent, Dataset, Role, Scalar
|
|
21
|
+
from vtlengine.Operators import Binary, Operator
|
|
23
22
|
|
|
24
23
|
|
|
25
24
|
class If(Operator):
|
|
26
25
|
"""
|
|
27
26
|
If class:
|
|
28
|
-
`If-then-else <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=225&zoom=100,72,142>`_ operator
|
|
27
|
+
`If-then-else <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=225&zoom=100,72,142>`_ operator
|
|
29
28
|
inherits from Operator, a superclass that contains general validate and evaluate class methods.
|
|
30
29
|
It has the following class methods:
|
|
31
30
|
Class methods:
|
|
@@ -40,7 +39,7 @@ class If(Operator):
|
|
|
40
39
|
validate: Class method that has two branches so datacomponent and datasets can be validated. With datacomponent,
|
|
41
40
|
the code reviews if it is actually a Measure and if it is a binary operation. Dataset branch reviews if the
|
|
42
41
|
identifiers are the same in 'if', 'then' and 'else'.
|
|
43
|
-
"""
|
|
42
|
+
""" # noqa E501
|
|
44
43
|
|
|
45
44
|
@classmethod
|
|
46
45
|
def evaluate(cls, condition: Any, true_branch: Any, false_branch: Any) -> Any:
|
|
@@ -108,7 +107,7 @@ class If(Operator):
|
|
|
108
107
|
)
|
|
109
108
|
if isinstance(result, Dataset):
|
|
110
109
|
drop_columns = [
|
|
111
|
-
column for column in result.data.columns if column not in result.components
|
|
110
|
+
column for column in result.data.columns if column not in result.components
|
|
112
111
|
]
|
|
113
112
|
result.data = result.data.dropna(subset=drop_columns).drop(columns=drop_columns)
|
|
114
113
|
if isinstance(true_branch, Scalar) and isinstance(false_branch, Scalar):
|
|
@@ -213,14 +212,14 @@ class If(Operator):
|
|
|
213
212
|
class Nvl(Binary):
|
|
214
213
|
"""
|
|
215
214
|
Null class:
|
|
216
|
-
`Nvl <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=229&zoom=100,72,370>`_operator class.
|
|
215
|
+
`Nvl <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=229&zoom=100,72,370>`_operator class.
|
|
217
216
|
It has the following class methods:
|
|
218
217
|
|
|
219
218
|
Class methods:
|
|
220
219
|
Validate: Class method that validates if the operation at scalar,
|
|
221
220
|
datacomponent or dataset level can be performed.
|
|
222
221
|
Evaluate: Evaluates the actual operation, returning the result.
|
|
223
|
-
"""
|
|
222
|
+
""" # noqa E501
|
|
224
223
|
|
|
225
224
|
@classmethod
|
|
226
225
|
def evaluate(cls, left: Any, right: Any) -> Union[Scalar, DataComponent, Dataset]:
|
|
@@ -287,21 +286,15 @@ class Nvl(Binary):
|
|
|
287
286
|
}
|
|
288
287
|
for comp in result_components.values():
|
|
289
288
|
comp.nullable = False
|
|
290
|
-
return Dataset(
|
|
291
|
-
name="result",
|
|
292
|
-
components=result_components,
|
|
293
|
-
data=None
|
|
294
|
-
)
|
|
289
|
+
return Dataset(name="result", components=result_components, data=None)
|
|
295
290
|
|
|
296
291
|
|
|
297
292
|
class Case(Operator):
|
|
298
293
|
|
|
299
294
|
@classmethod
|
|
300
|
-
def evaluate(
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
elseOp: Any
|
|
304
|
-
) -> Union[Scalar, DataComponent, Dataset]:
|
|
295
|
+
def evaluate(
|
|
296
|
+
cls, conditions: List[Any], thenOps: List[Any], elseOp: Any
|
|
297
|
+
) -> Union[Scalar, DataComponent, Dataset]:
|
|
305
298
|
|
|
306
299
|
result = cls.validate(conditions, thenOps, elseOp)
|
|
307
300
|
|
|
@@ -316,19 +309,25 @@ class Case(Operator):
|
|
|
316
309
|
|
|
317
310
|
for i, condition in enumerate(conditions):
|
|
318
311
|
value = thenOps[i].value if isinstance(thenOps[i], Scalar) else thenOps[i].data
|
|
319
|
-
result.data = np.where(
|
|
320
|
-
|
|
312
|
+
result.data = np.where(
|
|
313
|
+
condition.data, value, result.data # type: ignore[call-overload]
|
|
314
|
+
)
|
|
321
315
|
|
|
322
316
|
condition_mask_else = ~np.any([condition.data for condition in conditions], axis=0)
|
|
323
317
|
else_value = elseOp.value if isinstance(elseOp, Scalar) else elseOp.data
|
|
324
|
-
result.data = pd.Series(
|
|
325
|
-
|
|
318
|
+
result.data = pd.Series(
|
|
319
|
+
np.where(condition_mask_else, else_value, result.data),
|
|
320
|
+
index=conditions[0].data.index,
|
|
321
|
+
)
|
|
326
322
|
|
|
327
323
|
if isinstance(result, Dataset):
|
|
328
324
|
identifiers = result.get_identifiers_names()
|
|
329
325
|
columns = [col for col in result.get_components_names() if col not in identifiers]
|
|
330
|
-
result.data = (
|
|
331
|
-
|
|
326
|
+
result.data = (
|
|
327
|
+
conditions[0].data[identifiers]
|
|
328
|
+
if conditions[0].data is not None
|
|
329
|
+
else pd.DataFrame(columns=identifiers)
|
|
330
|
+
)
|
|
332
331
|
|
|
333
332
|
for i in range(len(conditions)):
|
|
334
333
|
condition = conditions[i]
|
|
@@ -336,28 +335,32 @@ class Case(Operator):
|
|
|
336
335
|
condition_mask = condition.data[bool_col]
|
|
337
336
|
|
|
338
337
|
result.data.loc[condition_mask, columns] = (
|
|
339
|
-
thenOps[i].value
|
|
338
|
+
thenOps[i].value
|
|
339
|
+
if isinstance(thenOps[i], Scalar)
|
|
340
340
|
else thenOps[i].data.loc[condition_mask, columns]
|
|
341
341
|
)
|
|
342
342
|
|
|
343
|
-
condition_mask_else = ~np.logical_or.reduce(
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
343
|
+
condition_mask_else = ~np.logical_or.reduce(
|
|
344
|
+
[
|
|
345
|
+
condition.data[
|
|
346
|
+
next(x.name for x in condition.get_measures() if x.data_type == Boolean)
|
|
347
|
+
].astype(bool)
|
|
348
|
+
for condition in conditions
|
|
349
|
+
]
|
|
350
|
+
)
|
|
347
351
|
|
|
348
352
|
result.data.loc[condition_mask_else, columns] = (
|
|
349
|
-
elseOp.value
|
|
353
|
+
elseOp.value
|
|
354
|
+
if isinstance(elseOp, Scalar)
|
|
350
355
|
else elseOp.data.loc[condition_mask_else, columns]
|
|
351
356
|
)
|
|
352
357
|
|
|
353
358
|
return result
|
|
354
359
|
|
|
355
360
|
@classmethod
|
|
356
|
-
def validate(
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
elseOp: Any
|
|
360
|
-
) -> Union[Scalar, DataComponent, Dataset]:
|
|
361
|
+
def validate(
|
|
362
|
+
cls, conditions: List[Any], thenOps: List[Any], elseOp: Any
|
|
363
|
+
) -> Union[Scalar, DataComponent, Dataset]:
|
|
361
364
|
|
|
362
365
|
if len(set(map(type, conditions))) > 1:
|
|
363
366
|
raise SemanticError("2-1-9-1", op=cls.op)
|
|
@@ -424,8 +427,4 @@ class Case(Operator):
|
|
|
424
427
|
if isinstance(op, Dataset) and op.get_components_names() != comp_names:
|
|
425
428
|
raise SemanticError("2-1-9-7", op=cls.op)
|
|
426
429
|
|
|
427
|
-
return Dataset(
|
|
428
|
-
name="result",
|
|
429
|
-
components=components,
|
|
430
|
-
data=None
|
|
431
|
-
)
|
|
430
|
+
return Dataset(name="result", components=components, data=None)
|
vtlengine/Operators/General.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
|
|
1
|
+
import sqlite3
|
|
2
|
+
from typing import Any, Dict, List, Union
|
|
2
3
|
|
|
3
4
|
import pandas as pd
|
|
4
|
-
import sqlite3
|
|
5
5
|
|
|
6
6
|
from vtlengine.DataTypes import COMP_NAME_MAPPING
|
|
7
7
|
from vtlengine.Exceptions import SemanticError
|
|
8
|
-
from vtlengine.Model import
|
|
8
|
+
from vtlengine.Model import Component, DataComponent, Dataset, ExternalRoutine, Role
|
|
9
9
|
from vtlengine.Operators import Binary, Unary
|
|
10
10
|
|
|
11
11
|
|
|
@@ -143,7 +143,7 @@ class Eval(Unary):
|
|
|
143
143
|
df = cls._execute_query(
|
|
144
144
|
external_routine.query, external_routine.dataset_names, empty_data_dict
|
|
145
145
|
)
|
|
146
|
-
component_names =
|
|
146
|
+
component_names = df.columns.tolist()
|
|
147
147
|
for comp_name in component_names:
|
|
148
148
|
if comp_name not in output.components:
|
|
149
149
|
raise SemanticError(
|