vtlengine 1.0__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +159 -102
- vtlengine/API/__init__.py +110 -68
- vtlengine/AST/ASTConstructor.py +188 -98
- vtlengine/AST/ASTConstructorModules/Expr.py +402 -205
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +248 -104
- vtlengine/AST/ASTConstructorModules/Terminals.py +158 -95
- vtlengine/AST/ASTEncoders.py +1 -1
- vtlengine/AST/ASTTemplate.py +24 -9
- vtlengine/AST/ASTVisitor.py +8 -12
- vtlengine/AST/DAG/__init__.py +43 -35
- vtlengine/AST/DAG/_words.py +4 -4
- vtlengine/AST/Grammar/Vtl.g4 +49 -20
- vtlengine/AST/Grammar/VtlTokens.g4 +13 -1
- vtlengine/AST/Grammar/lexer.py +2012 -1312
- vtlengine/AST/Grammar/parser.py +7524 -4343
- vtlengine/AST/Grammar/tokens.py +140 -128
- vtlengine/AST/VtlVisitor.py +16 -5
- vtlengine/AST/__init__.py +41 -11
- vtlengine/DataTypes/NumericTypesHandling.py +5 -4
- vtlengine/DataTypes/TimeHandling.py +196 -301
- vtlengine/DataTypes/__init__.py +304 -218
- vtlengine/Exceptions/__init__.py +96 -27
- vtlengine/Exceptions/messages.py +149 -69
- vtlengine/Interpreter/__init__.py +817 -497
- vtlengine/Model/__init__.py +172 -121
- vtlengine/Operators/Aggregation.py +156 -95
- vtlengine/Operators/Analytic.py +167 -79
- vtlengine/Operators/Assignment.py +7 -4
- vtlengine/Operators/Boolean.py +27 -32
- vtlengine/Operators/CastOperator.py +177 -131
- vtlengine/Operators/Clause.py +137 -99
- vtlengine/Operators/Comparison.py +148 -117
- vtlengine/Operators/Conditional.py +290 -98
- vtlengine/Operators/General.py +68 -47
- vtlengine/Operators/HROperators.py +91 -72
- vtlengine/Operators/Join.py +217 -118
- vtlengine/Operators/Numeric.py +129 -46
- vtlengine/Operators/RoleSetter.py +16 -15
- vtlengine/Operators/Set.py +61 -36
- vtlengine/Operators/String.py +213 -139
- vtlengine/Operators/Time.py +467 -215
- vtlengine/Operators/Validation.py +117 -76
- vtlengine/Operators/__init__.py +340 -213
- vtlengine/Utils/__init__.py +232 -41
- vtlengine/__init__.py +1 -1
- vtlengine/files/output/__init__.py +15 -6
- vtlengine/files/output/_time_period_representation.py +10 -9
- vtlengine/files/parser/__init__.py +79 -52
- vtlengine/files/parser/_rfc_dialect.py +6 -5
- vtlengine/files/parser/_time_checking.py +48 -37
- vtlengine-1.0.2.dist-info/METADATA +245 -0
- vtlengine-1.0.2.dist-info/RECORD +58 -0
- {vtlengine-1.0.dist-info → vtlengine-1.0.2.dist-info}/WHEEL +1 -1
- vtlengine-1.0.dist-info/METADATA +0 -104
- vtlengine-1.0.dist-info/RECORD +0 -58
- {vtlengine-1.0.dist-info → vtlengine-1.0.2.dist-info}/LICENSE.md +0 -0
vtlengine/Operators/General.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Dict, List
|
|
1
|
+
from typing import Dict, List, Any, Union
|
|
2
2
|
|
|
3
3
|
import pandas as pd
|
|
4
4
|
import sqlite3
|
|
@@ -20,38 +20,47 @@ class Membership(Binary):
|
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
22
|
@classmethod
|
|
23
|
-
def validate(cls, left_operand:
|
|
23
|
+
def validate(cls, left_operand: Any, right_operand: Any) -> Dataset:
|
|
24
24
|
if right_operand not in left_operand.components:
|
|
25
|
-
raise SemanticError(
|
|
26
|
-
|
|
25
|
+
raise SemanticError(
|
|
26
|
+
"1-1-1-10", op=cls.op, comp_name=right_operand, dataset_name=left_operand.name
|
|
27
|
+
)
|
|
27
28
|
|
|
28
29
|
component = left_operand.components[right_operand]
|
|
29
30
|
if component.role in (Role.IDENTIFIER, Role.ATTRIBUTE):
|
|
30
31
|
right_operand = COMP_NAME_MAPPING[component.data_type]
|
|
31
|
-
left_operand.components[right_operand] = Component(
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
32
|
+
left_operand.components[right_operand] = Component(
|
|
33
|
+
name=right_operand,
|
|
34
|
+
data_type=component.data_type,
|
|
35
|
+
role=Role.MEASURE,
|
|
36
|
+
nullable=component.nullable,
|
|
37
|
+
)
|
|
35
38
|
if left_operand.data is not None:
|
|
36
39
|
left_operand.data[right_operand] = left_operand.data[component.name]
|
|
37
40
|
left_operand.data[right_operand] = left_operand.data[component.name]
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
+
result_components = {
|
|
42
|
+
name: comp
|
|
43
|
+
for name, comp in left_operand.components.items()
|
|
44
|
+
if comp.role == Role.IDENTIFIER or comp.name == right_operand
|
|
45
|
+
}
|
|
41
46
|
result_dataset = Dataset(name="result", components=result_components, data=None)
|
|
42
47
|
return result_dataset
|
|
43
48
|
|
|
44
49
|
@classmethod
|
|
45
|
-
def evaluate(
|
|
46
|
-
|
|
50
|
+
def evaluate(
|
|
51
|
+
cls, left_operand: Dataset, right_operand: str, is_from_component_assignment: bool = False
|
|
52
|
+
) -> Union[DataComponent, Dataset]:
|
|
47
53
|
result_dataset = cls.validate(left_operand, right_operand)
|
|
48
|
-
if
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
54
|
+
if left_operand.data is not None:
|
|
55
|
+
if is_from_component_assignment:
|
|
56
|
+
return DataComponent(
|
|
57
|
+
name=right_operand,
|
|
58
|
+
data_type=left_operand.components[right_operand].data_type,
|
|
59
|
+
role=Role.MEASURE,
|
|
60
|
+
nullable=left_operand.components[right_operand].nullable,
|
|
61
|
+
data=left_operand.data[right_operand],
|
|
62
|
+
)
|
|
63
|
+
result_dataset.data = left_operand.data[list(result_dataset.components.keys())]
|
|
55
64
|
return result_dataset
|
|
56
65
|
|
|
57
66
|
|
|
@@ -60,19 +69,19 @@ class Alias(Binary):
|
|
|
60
69
|
It inherits from Binary class, and has the following class methods:
|
|
61
70
|
|
|
62
71
|
Class methods:
|
|
63
|
-
Validate: Ensures the name given in the right operand is different from the
|
|
64
|
-
Evaluate: Checks if the data between both operators are the same.
|
|
72
|
+
Validate: Ensures the name given in the right operand is different from the
|
|
73
|
+
name of the Dataset. Evaluate: Checks if the data between both operators are the same.
|
|
65
74
|
"""
|
|
66
75
|
|
|
67
76
|
@classmethod
|
|
68
|
-
def validate(cls, left_operand: Dataset, right_operand: str):
|
|
77
|
+
def validate(cls, left_operand: Dataset, right_operand: Union[str, Dataset]) -> Dataset:
|
|
69
78
|
new_name = right_operand if isinstance(right_operand, str) else right_operand.name
|
|
70
79
|
if new_name != left_operand.name and new_name in left_operand.get_components_names():
|
|
71
80
|
raise SemanticError("1-3-1", alias=new_name)
|
|
72
81
|
return Dataset(name=new_name, components=left_operand.components, data=None)
|
|
73
82
|
|
|
74
83
|
@classmethod
|
|
75
|
-
def evaluate(cls, left_operand: Dataset, right_operand: str) -> Dataset:
|
|
84
|
+
def evaluate(cls, left_operand: Dataset, right_operand: Union[str, Dataset]) -> Dataset:
|
|
76
85
|
result = cls.validate(left_operand, right_operand)
|
|
77
86
|
result.data = left_operand.data
|
|
78
87
|
return result
|
|
@@ -83,14 +92,16 @@ class Eval(Unary):
|
|
|
83
92
|
It inherits from Unary class and has the following class methods
|
|
84
93
|
|
|
85
94
|
Class methods:
|
|
86
|
-
Validate: checks if the external routine name is the same as the operand name,
|
|
95
|
+
Validate: checks if the external routine name is the same as the operand name,
|
|
96
|
+
which must be a Dataset.
|
|
87
97
|
Evaluate: Checks if the operand and the output is actually a Dataset.
|
|
88
98
|
|
|
89
99
|
"""
|
|
90
100
|
|
|
91
101
|
@staticmethod
|
|
92
|
-
def _execute_query(
|
|
93
|
-
|
|
102
|
+
def _execute_query(
|
|
103
|
+
query: str, dataset_names: List[str], data: Dict[str, pd.DataFrame]
|
|
104
|
+
) -> pd.DataFrame:
|
|
94
105
|
try:
|
|
95
106
|
conn = sqlite3.connect(":memory:")
|
|
96
107
|
try:
|
|
@@ -111,25 +122,33 @@ class Eval(Unary):
|
|
|
111
122
|
return df_result
|
|
112
123
|
|
|
113
124
|
@classmethod
|
|
114
|
-
def validate(
|
|
115
|
-
|
|
125
|
+
def validate( # type: ignore[override]
|
|
126
|
+
cls,
|
|
127
|
+
operands: Dict[str, Dataset],
|
|
128
|
+
external_routine: ExternalRoutine,
|
|
129
|
+
output: Dataset,
|
|
130
|
+
) -> Dataset:
|
|
116
131
|
|
|
117
132
|
empty_data_dict = {}
|
|
118
133
|
for ds_name in external_routine.dataset_names:
|
|
119
134
|
if ds_name not in operands:
|
|
120
|
-
raise ValueError(
|
|
121
|
-
|
|
135
|
+
raise ValueError(
|
|
136
|
+
f"External Routine dataset {ds_name} " f"is not present in Eval operands"
|
|
137
|
+
)
|
|
122
138
|
empty_data = pd.DataFrame(
|
|
123
|
-
columns=[comp.name for comp in operands[ds_name].components.values()]
|
|
139
|
+
columns=[comp.name for comp in operands[ds_name].components.values()]
|
|
140
|
+
)
|
|
124
141
|
empty_data_dict[ds_name] = empty_data
|
|
125
142
|
|
|
126
|
-
df = cls._execute_query(
|
|
127
|
-
|
|
143
|
+
df = cls._execute_query(
|
|
144
|
+
external_routine.query, external_routine.dataset_names, empty_data_dict
|
|
145
|
+
)
|
|
128
146
|
component_names = [name for name in df.columns]
|
|
129
147
|
for comp_name in component_names:
|
|
130
148
|
if comp_name not in output.components:
|
|
131
|
-
raise SemanticError(
|
|
132
|
-
|
|
149
|
+
raise SemanticError(
|
|
150
|
+
"1-1-1-10", op=cls.op, comp_name=comp_name, dataset_name=df.name
|
|
151
|
+
)
|
|
133
152
|
|
|
134
153
|
for comp_name in output.components:
|
|
135
154
|
if comp_name not in component_names:
|
|
@@ -140,15 +159,17 @@ class Eval(Unary):
|
|
|
140
159
|
return output
|
|
141
160
|
|
|
142
161
|
@classmethod
|
|
143
|
-
def evaluate(
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
162
|
+
def evaluate( # type: ignore[override]
|
|
163
|
+
cls,
|
|
164
|
+
operands: Dict[str, Dataset],
|
|
165
|
+
external_routine: ExternalRoutine,
|
|
166
|
+
output: Dataset,
|
|
167
|
+
) -> Dataset:
|
|
168
|
+
result: Dataset = cls.validate(operands, external_routine, output)
|
|
169
|
+
operands_data_dict = {ds_name: operands[ds_name].data for ds_name in operands}
|
|
170
|
+
result.data = cls._execute_query(
|
|
171
|
+
external_routine.query,
|
|
172
|
+
external_routine.dataset_names,
|
|
173
|
+
operands_data_dict, # type: ignore[arg-type]
|
|
174
|
+
)
|
|
154
175
|
return result
|
|
@@ -17,38 +17,39 @@ def get_measure_from_dataset(dataset: Dataset, code_item: str) -> DataComponent:
|
|
|
17
17
|
data = None
|
|
18
18
|
else:
|
|
19
19
|
data = dataset.data[measure_name]
|
|
20
|
-
return DataComponent(
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
20
|
+
return DataComponent(
|
|
21
|
+
name=code_item,
|
|
22
|
+
data=data,
|
|
23
|
+
data_type=dataset.components[measure_name].data_type,
|
|
24
|
+
role=dataset.components[measure_name].role,
|
|
25
|
+
nullable=dataset.components[measure_name].nullable,
|
|
26
|
+
)
|
|
24
27
|
|
|
25
28
|
|
|
26
29
|
class HRComparison(Operators.Binary):
|
|
27
30
|
|
|
28
31
|
@classmethod
|
|
29
|
-
def imbalance_func(cls, x, y):
|
|
32
|
+
def imbalance_func(cls, x: Any, y: Any) -> Any:
|
|
30
33
|
if pd.isnull(x) or pd.isnull(y):
|
|
31
34
|
return None
|
|
32
35
|
return x - y
|
|
33
36
|
|
|
34
37
|
@staticmethod
|
|
35
|
-
def hr_func(x, y, hr_mode, func):
|
|
38
|
+
def hr_func(x: Any, y: Any, hr_mode: str, func: Any) -> Any:
|
|
36
39
|
# In comments, it is specified the condition for evaluating the rule,
|
|
37
40
|
# so we delete the cases that does not satisfy the condition
|
|
38
41
|
# (line 6509 of the reference manual)
|
|
39
|
-
if
|
|
40
|
-
|
|
41
|
-
y == "REMOVE_VALUE"):
|
|
42
|
-
if hr_mode == 'partial_null' and pd.isnull(x):
|
|
42
|
+
if hr_mode in ("partial_null", "partial_zero") and not pd.isnull(y) and y == "REMOVE_VALUE":
|
|
43
|
+
if hr_mode == "partial_null" and pd.isnull(x):
|
|
43
44
|
return "REMOVE_VALUE"
|
|
44
|
-
elif hr_mode ==
|
|
45
|
+
elif hr_mode == "partial_zero" and not pd.isnull(x) and x == 0:
|
|
45
46
|
return "REMOVE_VALUE"
|
|
46
47
|
return None
|
|
47
|
-
if hr_mode ==
|
|
48
|
+
if hr_mode == "non_null":
|
|
48
49
|
# If all the involved Data Points are not NULL
|
|
49
50
|
if pd.isnull(x) or pd.isnull(y):
|
|
50
51
|
return "REMOVE_VALUE"
|
|
51
|
-
elif hr_mode ==
|
|
52
|
+
elif hr_mode == "non_zero":
|
|
52
53
|
# If at least one of the involved Data Points is <> zero
|
|
53
54
|
if not (pd.isnull(x) and pd.isnull(y)) and (x == 0 and y == 0):
|
|
54
55
|
return "REMOVE_VALUE"
|
|
@@ -56,64 +57,72 @@ class HRComparison(Operators.Binary):
|
|
|
56
57
|
return func(x, y)
|
|
57
58
|
|
|
58
59
|
@classmethod
|
|
59
|
-
def apply_hr_func(cls, left_series, right_series, hr_mode, func):
|
|
60
|
+
def apply_hr_func(cls, left_series: Any, right_series: Any, hr_mode: str, func: Any) -> Any:
|
|
60
61
|
return left_series.combine(right_series, lambda x, y: cls.hr_func(x, y, hr_mode, func))
|
|
61
62
|
|
|
62
63
|
@classmethod
|
|
63
64
|
def validate(cls, left_operand: Dataset, right_operand: DataComponent, hr_mode: str) -> Dataset:
|
|
64
|
-
result_components = {
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
65
|
+
result_components = {
|
|
66
|
+
comp_name: copy(comp)
|
|
67
|
+
for comp_name, comp in left_operand.components.items()
|
|
68
|
+
if comp.role == Role.IDENTIFIER
|
|
69
|
+
}
|
|
70
|
+
result_components["bool_var"] = Component(
|
|
71
|
+
name="bool_var", data_type=Boolean, role=Role.MEASURE, nullable=True
|
|
72
|
+
)
|
|
73
|
+
result_components["imbalance"] = Component(
|
|
74
|
+
name="imbalance", data_type=Number, role=Role.MEASURE, nullable=True
|
|
75
|
+
)
|
|
76
|
+
return Dataset(
|
|
77
|
+
name=f"{left_operand.name}{cls.op}{right_operand.name}",
|
|
78
|
+
components=result_components,
|
|
79
|
+
data=None,
|
|
80
|
+
)
|
|
77
81
|
|
|
78
82
|
@classmethod
|
|
79
|
-
def evaluate(
|
|
83
|
+
def evaluate( # type: ignore[override]
|
|
84
|
+
cls, left: Dataset, right: DataComponent, hr_mode: str
|
|
85
|
+
) -> Dataset:
|
|
80
86
|
result = cls.validate(left, right, hr_mode)
|
|
81
|
-
result.data = left.data.copy()
|
|
87
|
+
result.data = left.data.copy() if left.data is not None else pd.DataFrame()
|
|
82
88
|
measure_name = left.get_measures_names()[0]
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
89
|
+
if left.data is not None and right.data is not None:
|
|
90
|
+
result.data["bool_var"] = cls.apply_hr_func(
|
|
91
|
+
left.data[measure_name], right.data, hr_mode, cls.op_func
|
|
92
|
+
)
|
|
93
|
+
result.data["imbalance"] = cls.apply_hr_func(
|
|
94
|
+
left.data[measure_name], right.data, hr_mode, cls.imbalance_func
|
|
95
|
+
)
|
|
87
96
|
# Removing datapoints that should not be returned
|
|
88
97
|
# (we do it below imbalance calculation
|
|
89
98
|
# to avoid errors on different shape)
|
|
90
|
-
result.data = result.data[result.data[
|
|
99
|
+
result.data = result.data[result.data["bool_var"] != "REMOVE_VALUE"]
|
|
91
100
|
result.data.drop(measure_name, axis=1, inplace=True)
|
|
92
101
|
return result
|
|
93
102
|
|
|
94
103
|
|
|
95
104
|
class HREqual(HRComparison):
|
|
96
|
-
op =
|
|
105
|
+
op = "="
|
|
97
106
|
py_op = operator.eq
|
|
98
107
|
|
|
99
108
|
|
|
100
109
|
class HRGreater(HRComparison):
|
|
101
|
-
op =
|
|
110
|
+
op = ">"
|
|
102
111
|
py_op = operator.gt
|
|
103
112
|
|
|
104
113
|
|
|
105
114
|
class HRGreaterEqual(HRComparison):
|
|
106
|
-
op =
|
|
115
|
+
op = ">="
|
|
107
116
|
py_op = operator.ge
|
|
108
117
|
|
|
109
118
|
|
|
110
119
|
class HRLess(HRComparison):
|
|
111
|
-
op =
|
|
120
|
+
op = "<"
|
|
112
121
|
py_op = operator.lt
|
|
113
122
|
|
|
114
123
|
|
|
115
124
|
class HRLessEqual(HRComparison):
|
|
116
|
-
op =
|
|
125
|
+
op = "<="
|
|
117
126
|
py_op = operator.le
|
|
118
127
|
|
|
119
128
|
|
|
@@ -128,38 +137,46 @@ class HRBinNumeric(Operators.Binary):
|
|
|
128
137
|
@classmethod
|
|
129
138
|
def evaluate(cls, left: DataComponent, right: DataComponent) -> DataComponent:
|
|
130
139
|
result_data = cls.apply_operation_two_series(left.data, right.data)
|
|
131
|
-
return DataComponent(
|
|
132
|
-
|
|
133
|
-
|
|
140
|
+
return DataComponent(
|
|
141
|
+
name=f"{left.name}{cls.op}{right.name}",
|
|
142
|
+
data=result_data,
|
|
143
|
+
data_type=left.data_type,
|
|
144
|
+
role=left.role,
|
|
145
|
+
nullable=left.nullable,
|
|
146
|
+
)
|
|
134
147
|
|
|
135
148
|
|
|
136
149
|
class HRBinPlus(HRBinNumeric):
|
|
137
|
-
op =
|
|
150
|
+
op = "+"
|
|
138
151
|
py_op = operator.add
|
|
139
152
|
|
|
140
153
|
|
|
141
154
|
class HRBinMinus(HRBinNumeric):
|
|
142
|
-
op =
|
|
155
|
+
op = "-"
|
|
143
156
|
py_op = operator.sub
|
|
144
157
|
|
|
145
158
|
|
|
146
159
|
class HRUnNumeric(Operators.Unary):
|
|
147
160
|
|
|
148
161
|
@classmethod
|
|
149
|
-
def evaluate(cls, operand: DataComponent):
|
|
162
|
+
def evaluate(cls, operand: DataComponent) -> DataComponent: # type: ignore[override]
|
|
150
163
|
result_data = cls.apply_operation_component(operand.data)
|
|
151
|
-
return DataComponent(
|
|
152
|
-
|
|
153
|
-
|
|
164
|
+
return DataComponent(
|
|
165
|
+
name=f"{cls.op}({operand.name})",
|
|
166
|
+
data=result_data,
|
|
167
|
+
data_type=operand.data_type,
|
|
168
|
+
role=operand.role,
|
|
169
|
+
nullable=operand.nullable,
|
|
170
|
+
)
|
|
154
171
|
|
|
155
172
|
|
|
156
173
|
class HRUnPlus(HRUnNumeric):
|
|
157
|
-
op =
|
|
174
|
+
op = "+"
|
|
158
175
|
py_op = operator.pos
|
|
159
176
|
|
|
160
177
|
|
|
161
178
|
class HRUnMinus(HRUnNumeric):
|
|
162
|
-
op =
|
|
179
|
+
op = "-"
|
|
163
180
|
py_op = operator.neg
|
|
164
181
|
|
|
165
182
|
|
|
@@ -167,28 +184,28 @@ class HAAssignment(Operators.Binary):
|
|
|
167
184
|
|
|
168
185
|
@classmethod
|
|
169
186
|
def validate(cls, left: Dataset, right: DataComponent, hr_mode: str) -> Dataset:
|
|
170
|
-
result_components = {comp_name: copy(comp) for comp_name, comp in
|
|
171
|
-
|
|
172
|
-
return Dataset(name=f"{left.name}",
|
|
173
|
-
components=result_components,
|
|
174
|
-
data=None)
|
|
187
|
+
result_components = {comp_name: copy(comp) for comp_name, comp in left.components.items()}
|
|
188
|
+
return Dataset(name=f"{left.name}", components=result_components, data=None)
|
|
175
189
|
|
|
176
190
|
@classmethod
|
|
177
|
-
def evaluate(
|
|
191
|
+
def evaluate( # type: ignore[override]
|
|
192
|
+
cls, left: Dataset, right: DataComponent, hr_mode: str
|
|
193
|
+
) -> Dataset:
|
|
178
194
|
result = cls.validate(left, right, hr_mode)
|
|
179
195
|
measure_name = left.get_measures_names()[0]
|
|
180
|
-
result.data = left.data.copy()
|
|
181
|
-
|
|
196
|
+
result.data = left.data.copy() if left.data is not None else pd.DataFrame()
|
|
197
|
+
if right.data is not None:
|
|
198
|
+
result.data[measure_name] = right.data.map(lambda x: cls.handle_mode(x, hr_mode))
|
|
182
199
|
result.data = result.data[result.data[measure_name] != "REMOVE_VALUE"]
|
|
183
200
|
return result
|
|
184
201
|
|
|
185
202
|
@classmethod
|
|
186
|
-
def handle_mode(cls, x, hr_mode):
|
|
203
|
+
def handle_mode(cls, x: Any, hr_mode: str) -> Any:
|
|
187
204
|
if not pd.isnull(x) and x == "REMOVE_VALUE":
|
|
188
205
|
return "REMOVE_VALUE"
|
|
189
|
-
if hr_mode ==
|
|
206
|
+
if hr_mode == "non_null" and pd.isnull(x):
|
|
190
207
|
return "REMOVE_VALUE"
|
|
191
|
-
elif hr_mode ==
|
|
208
|
+
elif hr_mode == "non_zero" and x == 0:
|
|
192
209
|
return "REMOVE_VALUE"
|
|
193
210
|
return x
|
|
194
211
|
|
|
@@ -204,17 +221,18 @@ class Hierarchy(Operators.Operator):
|
|
|
204
221
|
return df
|
|
205
222
|
|
|
206
223
|
@classmethod
|
|
207
|
-
def validate(
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
224
|
+
def validate(
|
|
225
|
+
cls, dataset: Dataset, computed_dict: Dict[str, DataFrame], output: str
|
|
226
|
+
) -> Dataset:
|
|
227
|
+
result_components = {
|
|
228
|
+
comp_name: copy(comp) for comp_name, comp in dataset.components.items()
|
|
229
|
+
}
|
|
230
|
+
return Dataset(name=dataset.name, components=result_components, data=None)
|
|
214
231
|
|
|
215
232
|
@classmethod
|
|
216
|
-
def evaluate(
|
|
217
|
-
|
|
233
|
+
def evaluate(
|
|
234
|
+
cls, dataset: Dataset, computed_dict: Dict[str, DataFrame], output: str
|
|
235
|
+
) -> Dataset:
|
|
218
236
|
result = cls.validate(dataset, computed_dict, output)
|
|
219
237
|
if len(computed_dict) == 0:
|
|
220
238
|
computed_data = pd.DataFrame(columns=dataset.get_components_names())
|
|
@@ -227,7 +245,8 @@ class Hierarchy(Operators.Operator):
|
|
|
227
245
|
# union(setdiff(op, R), R) where R is the computed data.
|
|
228
246
|
# It is the same as union(op, R) and drop duplicates, selecting the last one available
|
|
229
247
|
result.data = pd.concat([dataset.data, computed_data], axis=0, ignore_index=True)
|
|
230
|
-
result.data.drop_duplicates(
|
|
231
|
-
|
|
248
|
+
result.data.drop_duplicates(
|
|
249
|
+
subset=dataset.get_identifiers_names(), keep="last", inplace=True
|
|
250
|
+
)
|
|
232
251
|
result.data.reset_index(drop=True, inplace=True)
|
|
233
252
|
return result
|