vtlengine 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +153 -100
- vtlengine/API/__init__.py +109 -67
- vtlengine/AST/ASTConstructor.py +188 -98
- vtlengine/AST/ASTConstructorModules/Expr.py +306 -200
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +172 -102
- vtlengine/AST/ASTConstructorModules/Terminals.py +158 -95
- vtlengine/AST/ASTEncoders.py +1 -1
- vtlengine/AST/ASTTemplate.py +8 -9
- vtlengine/AST/ASTVisitor.py +8 -12
- vtlengine/AST/DAG/__init__.py +43 -35
- vtlengine/AST/DAG/_words.py +4 -4
- vtlengine/AST/Grammar/lexer.py +732 -142
- vtlengine/AST/Grammar/parser.py +2188 -826
- vtlengine/AST/Grammar/tokens.py +128 -128
- vtlengine/AST/VtlVisitor.py +7 -4
- vtlengine/AST/__init__.py +22 -11
- vtlengine/DataTypes/NumericTypesHandling.py +5 -4
- vtlengine/DataTypes/TimeHandling.py +194 -301
- vtlengine/DataTypes/__init__.py +304 -218
- vtlengine/Exceptions/__init__.py +52 -27
- vtlengine/Exceptions/messages.py +134 -62
- vtlengine/Interpreter/__init__.py +781 -487
- vtlengine/Model/__init__.py +165 -121
- vtlengine/Operators/Aggregation.py +156 -95
- vtlengine/Operators/Analytic.py +115 -59
- vtlengine/Operators/Assignment.py +7 -4
- vtlengine/Operators/Boolean.py +27 -32
- vtlengine/Operators/CastOperator.py +177 -131
- vtlengine/Operators/Clause.py +137 -99
- vtlengine/Operators/Comparison.py +148 -117
- vtlengine/Operators/Conditional.py +149 -98
- vtlengine/Operators/General.py +68 -47
- vtlengine/Operators/HROperators.py +91 -72
- vtlengine/Operators/Join.py +217 -118
- vtlengine/Operators/Numeric.py +89 -44
- vtlengine/Operators/RoleSetter.py +16 -15
- vtlengine/Operators/Set.py +61 -36
- vtlengine/Operators/String.py +213 -139
- vtlengine/Operators/Time.py +334 -216
- vtlengine/Operators/Validation.py +117 -76
- vtlengine/Operators/__init__.py +340 -213
- vtlengine/Utils/__init__.py +195 -40
- vtlengine/__init__.py +1 -1
- vtlengine/files/output/__init__.py +15 -6
- vtlengine/files/output/_time_period_representation.py +10 -9
- vtlengine/files/parser/__init__.py +77 -52
- vtlengine/files/parser/_rfc_dialect.py +6 -5
- vtlengine/files/parser/_time_checking.py +46 -37
- vtlengine-1.0.1.dist-info/METADATA +236 -0
- vtlengine-1.0.1.dist-info/RECORD +58 -0
- {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/WHEEL +1 -1
- vtlengine-1.0.dist-info/METADATA +0 -104
- vtlengine-1.0.dist-info/RECORD +0 -58
- {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/LICENSE.md +0 -0
|
@@ -9,13 +9,20 @@ from vtlengine.AST.Grammar.tokens import CHECK, CHECK_HIERARCHY
|
|
|
9
9
|
from vtlengine.Exceptions import SemanticError
|
|
10
10
|
from vtlengine.Model import Component, Dataset, Role
|
|
11
11
|
|
|
12
|
+
|
|
12
13
|
# noinspection PyTypeChecker
|
|
13
14
|
class Check(Operator):
|
|
14
15
|
op = CHECK
|
|
15
16
|
|
|
16
17
|
@classmethod
|
|
17
|
-
def validate(
|
|
18
|
-
|
|
18
|
+
def validate(
|
|
19
|
+
cls,
|
|
20
|
+
validation_element: Dataset,
|
|
21
|
+
imbalance_element: Optional[Dataset],
|
|
22
|
+
error_code: Optional[str],
|
|
23
|
+
error_level: Optional[int],
|
|
24
|
+
invalid: bool,
|
|
25
|
+
) -> Dataset:
|
|
19
26
|
if len(validation_element.get_measures()) != 1:
|
|
20
27
|
raise SemanticError("1-1-10-1", op=cls.op, op_type="validation", me_type="Boolean")
|
|
21
28
|
measure = validation_element.get_measures()[0]
|
|
@@ -28,7 +35,8 @@ class Check(Operator):
|
|
|
28
35
|
imbalance_identifiers = imbalance_element.get_identifiers_names()
|
|
29
36
|
if operand_identifiers != imbalance_identifiers:
|
|
30
37
|
raise Exception(
|
|
31
|
-
"The validation and imbalance operands must have the same identifiers"
|
|
38
|
+
"The validation and imbalance operands must have the same identifiers"
|
|
39
|
+
)
|
|
32
40
|
if len(imbalance_element.get_measures()) != 1:
|
|
33
41
|
raise SemanticError("1-1-10-1", op=cls.op, op_type="imbalance", me_type="Numeric")
|
|
34
42
|
|
|
@@ -37,39 +45,54 @@ class Check(Operator):
|
|
|
37
45
|
raise SemanticError("1-1-10-1", op=cls.op, op_type="imbalance", me_type="Numeric")
|
|
38
46
|
|
|
39
47
|
# Generating the result dataset components
|
|
40
|
-
result_components = {
|
|
41
|
-
|
|
48
|
+
result_components = {
|
|
49
|
+
comp.name: comp
|
|
50
|
+
for comp in validation_element.components.values()
|
|
51
|
+
if comp.role in [Role.IDENTIFIER, Role.MEASURE]
|
|
52
|
+
}
|
|
42
53
|
if imbalance_measure is None:
|
|
43
|
-
result_components[
|
|
44
|
-
|
|
54
|
+
result_components["imbalance"] = Component(
|
|
55
|
+
name="imbalance", data_type=Number, role=Role.MEASURE, nullable=True
|
|
56
|
+
)
|
|
45
57
|
else:
|
|
46
|
-
result_components[
|
|
47
|
-
result_components[
|
|
58
|
+
result_components["imbalance"] = copy(imbalance_measure)
|
|
59
|
+
result_components["imbalance"].name = "imbalance"
|
|
48
60
|
|
|
49
|
-
result_components[
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
61
|
+
result_components["errorcode"] = Component(
|
|
62
|
+
name="errorcode", data_type=String, role=Role.MEASURE, nullable=True
|
|
63
|
+
)
|
|
64
|
+
result_components["errorlevel"] = Component(
|
|
65
|
+
name="errorlevel", data_type=Integer, role=Role.MEASURE, nullable=True
|
|
66
|
+
)
|
|
54
67
|
|
|
55
68
|
return Dataset(name="result", components=result_components, data=None)
|
|
56
69
|
|
|
57
70
|
@classmethod
|
|
58
|
-
def evaluate(
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
71
|
+
def evaluate(
|
|
72
|
+
cls,
|
|
73
|
+
validation_element: Dataset,
|
|
74
|
+
imbalance_element: Optional[Dataset],
|
|
75
|
+
error_code: Optional[str],
|
|
76
|
+
error_level: Optional[int],
|
|
77
|
+
invalid: bool,
|
|
78
|
+
) -> Dataset:
|
|
79
|
+
result = cls.validate(
|
|
80
|
+
validation_element, imbalance_element, error_code, error_level, invalid
|
|
81
|
+
)
|
|
82
|
+
if validation_element.data is None:
|
|
83
|
+
validation_element.data = pd.DataFrame()
|
|
84
|
+
columns_to_keep = (
|
|
85
|
+
validation_element.get_identifiers_names() + validation_element.get_measures_names()
|
|
86
|
+
)
|
|
64
87
|
result.data = validation_element.data.loc[:, columns_to_keep]
|
|
65
|
-
if imbalance_element is not None:
|
|
88
|
+
if imbalance_element is not None and imbalance_element.data is not None:
|
|
66
89
|
imbalance_measure_name = imbalance_element.get_measures_names()[0]
|
|
67
|
-
result.data[
|
|
90
|
+
result.data["imbalance"] = imbalance_element.data[imbalance_measure_name]
|
|
68
91
|
else:
|
|
69
|
-
result.data[
|
|
92
|
+
result.data["imbalance"] = None
|
|
70
93
|
|
|
71
|
-
result.data[
|
|
72
|
-
result.data[
|
|
94
|
+
result.data["errorcode"] = error_code
|
|
95
|
+
result.data["errorlevel"] = error_level
|
|
73
96
|
if invalid:
|
|
74
97
|
# TODO: Is this always bool_var?? In any case this does the trick for more use cases
|
|
75
98
|
validation_measure_name = validation_element.get_measures_names()[0]
|
|
@@ -85,10 +108,10 @@ class Validation(Operator):
|
|
|
85
108
|
def _generate_result_data(cls, rule_info: Dict[str, Any]) -> pd.DataFrame:
|
|
86
109
|
rule_list_df = []
|
|
87
110
|
for rule_name, rule_data in rule_info.items():
|
|
88
|
-
rule_df = rule_data[
|
|
89
|
-
rule_df[
|
|
90
|
-
rule_df[
|
|
91
|
-
rule_df[
|
|
111
|
+
rule_df = rule_data["output"]
|
|
112
|
+
rule_df["ruleid"] = rule_name
|
|
113
|
+
rule_df["errorcode"] = rule_df["bool_var"].map({False: rule_data["errorcode"]})
|
|
114
|
+
rule_df["errorlevel"] = rule_df["bool_var"].map({False: rule_data["errorlevel"]})
|
|
92
115
|
rule_list_df.append(rule_df)
|
|
93
116
|
|
|
94
117
|
if len(rule_list_df) == 1:
|
|
@@ -99,26 +122,32 @@ class Validation(Operator):
|
|
|
99
122
|
@classmethod
|
|
100
123
|
def validate(cls, dataset_element: Dataset, rule_info: Dict[str, Any], output: str) -> Dataset:
|
|
101
124
|
result_components = {comp.name: comp for comp in dataset_element.get_identifiers()}
|
|
102
|
-
result_components[
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
125
|
+
result_components["ruleid"] = Component(
|
|
126
|
+
name="ruleid", data_type=String, role=Role.IDENTIFIER, nullable=False
|
|
127
|
+
)
|
|
128
|
+
if output == "invalid":
|
|
129
|
+
result_components = {
|
|
130
|
+
**result_components,
|
|
131
|
+
**{comp.name: copy(comp) for comp in dataset_element.get_measures()},
|
|
132
|
+
}
|
|
133
|
+
elif output == "all":
|
|
134
|
+
result_components["bool_var"] = Component(
|
|
135
|
+
name="bool_var", data_type=Boolean, role=Role.MEASURE, nullable=True
|
|
136
|
+
)
|
|
112
137
|
else: # output == 'all_measures'
|
|
113
|
-
result_components = {
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
result_components[
|
|
121
|
-
|
|
138
|
+
result_components = {
|
|
139
|
+
**result_components,
|
|
140
|
+
**{comp.name: copy(comp) for comp in dataset_element.get_measures()},
|
|
141
|
+
"bool_var": Component(
|
|
142
|
+
name="bool_var", data_type=Boolean, role=Role.MEASURE, nullable=True
|
|
143
|
+
),
|
|
144
|
+
}
|
|
145
|
+
result_components["errorcode"] = Component(
|
|
146
|
+
name="errorcode", data_type=String, role=Role.MEASURE, nullable=True
|
|
147
|
+
)
|
|
148
|
+
result_components["errorlevel"] = Component(
|
|
149
|
+
name="errorlevel", data_type=Number, role=Role.MEASURE, nullable=True
|
|
150
|
+
)
|
|
122
151
|
|
|
123
152
|
return Dataset(name="result", components=result_components, data=None)
|
|
124
153
|
|
|
@@ -127,25 +156,27 @@ class Validation(Operator):
|
|
|
127
156
|
result = cls.validate(dataset_element, rule_info, output)
|
|
128
157
|
result.data = cls._generate_result_data(rule_info)
|
|
129
158
|
|
|
130
|
-
result.data = result.data.dropna(subset=result.get_identifiers_names(),
|
|
131
|
-
how="any")
|
|
159
|
+
result.data = result.data.dropna(subset=result.get_identifiers_names(), how="any")
|
|
132
160
|
result.data = result.data.drop_duplicates(
|
|
133
|
-
subset=result.get_identifiers_names() + [
|
|
134
|
-
|
|
161
|
+
subset=result.get_identifiers_names() + ["ruleid"]
|
|
162
|
+
).reset_index(drop=True)
|
|
163
|
+
validation_measures = ["bool_var", "errorcode", "errorlevel"]
|
|
135
164
|
# Only for check hierarchy
|
|
136
|
-
if
|
|
137
|
-
validation_measures.append(
|
|
138
|
-
if output ==
|
|
139
|
-
result.data = result.data[result.data[
|
|
140
|
-
result.data = result.data.drop(columns=[
|
|
165
|
+
if "imbalance" in result.components:
|
|
166
|
+
validation_measures.append("imbalance")
|
|
167
|
+
if output == "invalid":
|
|
168
|
+
result.data = result.data[result.data["bool_var"] == False]
|
|
169
|
+
result.data = result.data.drop(columns=["bool_var"])
|
|
141
170
|
result.data.reset_index(drop=True, inplace=True)
|
|
142
|
-
elif output ==
|
|
143
|
-
result.data = result.data[
|
|
144
|
-
result.get_identifiers_names() + validation_measures]
|
|
171
|
+
elif output == "all":
|
|
172
|
+
result.data = result.data[result.get_identifiers_names() + validation_measures]
|
|
145
173
|
else: # output == 'all_measures'
|
|
146
174
|
|
|
147
175
|
result.data = result.data[
|
|
148
|
-
result.get_identifiers_names()
|
|
176
|
+
result.get_identifiers_names()
|
|
177
|
+
+ dataset_element.get_measures_names()
|
|
178
|
+
+ validation_measures
|
|
179
|
+
]
|
|
149
180
|
|
|
150
181
|
result.data = result.data[result.get_components_names()]
|
|
151
182
|
return result
|
|
@@ -162,38 +193,48 @@ class Check_Hierarchy(Validation):
|
|
|
162
193
|
def _generate_result_data(cls, rule_info: Dict[str, Any]) -> pd.DataFrame:
|
|
163
194
|
df = None
|
|
164
195
|
for rule_name, rule_data in rule_info.items():
|
|
165
|
-
rule_df = rule_data[
|
|
166
|
-
rule_df[
|
|
167
|
-
rule_df[
|
|
168
|
-
rule_df[
|
|
196
|
+
rule_df = rule_data["output"]
|
|
197
|
+
rule_df["ruleid"] = rule_name
|
|
198
|
+
rule_df["errorcode"] = rule_data["errorcode"]
|
|
199
|
+
rule_df["errorlevel"] = rule_data["errorlevel"]
|
|
169
200
|
if df is None:
|
|
170
201
|
df = rule_df
|
|
171
202
|
else:
|
|
172
203
|
df = pd.concat([df, rule_df], ignore_index=True)
|
|
204
|
+
if df is None:
|
|
205
|
+
df = pd.DataFrame()
|
|
173
206
|
return df
|
|
174
207
|
|
|
175
208
|
@classmethod
|
|
176
209
|
def validate(cls, dataset_element: Dataset, rule_info: Dict[str, Any], output: str) -> Dataset:
|
|
177
210
|
result = super().validate(dataset_element, rule_info, output)
|
|
178
|
-
result.components[
|
|
179
|
-
|
|
211
|
+
result.components["imbalance"] = Component(
|
|
212
|
+
name="imbalance", data_type=Number, role=Role.MEASURE, nullable=True
|
|
213
|
+
)
|
|
180
214
|
return result
|
|
181
215
|
|
|
182
216
|
@staticmethod
|
|
183
|
-
def validate_hr_dataset(dataset: Dataset, component_name: str):
|
|
217
|
+
def validate_hr_dataset(dataset: Dataset, component_name: str) -> None:
|
|
184
218
|
if len(dataset.get_measures()) != 1:
|
|
185
|
-
raise SemanticError(
|
|
186
|
-
|
|
219
|
+
raise SemanticError(
|
|
220
|
+
"1-1-10-1", op=Check_Hierarchy.op, op_type="hierarchy", me_type="Number"
|
|
221
|
+
)
|
|
187
222
|
measure = dataset.get_measures()[0]
|
|
188
223
|
if not check_unary_implicit_promotion(measure.data_type, Number):
|
|
189
|
-
raise SemanticError(
|
|
190
|
-
|
|
224
|
+
raise SemanticError(
|
|
225
|
+
"1-1-10-1", op=Check_Hierarchy.op, op_type="hierarchy", me_type="Number"
|
|
226
|
+
)
|
|
191
227
|
if component_name not in dataset.components:
|
|
192
|
-
raise SemanticError(
|
|
193
|
-
|
|
228
|
+
raise SemanticError(
|
|
229
|
+
"1-1-1-10",
|
|
230
|
+
op=Check_Hierarchy.op,
|
|
231
|
+
comp_name=component_name,
|
|
232
|
+
dataset_name=dataset.name,
|
|
233
|
+
)
|
|
194
234
|
if dataset.components[component_name].role != Role.IDENTIFIER:
|
|
195
|
-
raise SemanticError(
|
|
196
|
-
|
|
235
|
+
raise SemanticError(
|
|
236
|
+
"1-3-20", name=component_name, role=dataset.components[component_name].role.value
|
|
237
|
+
)
|
|
197
238
|
# Remove attributes from dataset
|
|
198
239
|
if len(dataset.get_attributes()) > 0:
|
|
199
240
|
for x in dataset.get_attributes():
|