vtlengine 1.4.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vtlengine/API/_InternalApi.py +791 -0
- vtlengine/API/__init__.py +612 -0
- vtlengine/API/data/schema/external_routines_schema.json +34 -0
- vtlengine/API/data/schema/json_schema_2.1.json +116 -0
- vtlengine/API/data/schema/value_domain_schema.json +97 -0
- vtlengine/AST/ASTComment.py +57 -0
- vtlengine/AST/ASTConstructor.py +598 -0
- vtlengine/AST/ASTConstructorModules/Expr.py +1928 -0
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +995 -0
- vtlengine/AST/ASTConstructorModules/Terminals.py +790 -0
- vtlengine/AST/ASTConstructorModules/__init__.py +50 -0
- vtlengine/AST/ASTDataExchange.py +10 -0
- vtlengine/AST/ASTEncoders.py +32 -0
- vtlengine/AST/ASTString.py +675 -0
- vtlengine/AST/ASTTemplate.py +558 -0
- vtlengine/AST/ASTVisitor.py +25 -0
- vtlengine/AST/DAG/__init__.py +479 -0
- vtlengine/AST/DAG/_words.py +10 -0
- vtlengine/AST/Grammar/Vtl.g4 +705 -0
- vtlengine/AST/Grammar/VtlTokens.g4 +409 -0
- vtlengine/AST/Grammar/__init__.py +0 -0
- vtlengine/AST/Grammar/lexer.py +2139 -0
- vtlengine/AST/Grammar/parser.py +16597 -0
- vtlengine/AST/Grammar/tokens.py +169 -0
- vtlengine/AST/VtlVisitor.py +824 -0
- vtlengine/AST/__init__.py +674 -0
- vtlengine/DataTypes/TimeHandling.py +562 -0
- vtlengine/DataTypes/__init__.py +863 -0
- vtlengine/DataTypes/_time_checking.py +135 -0
- vtlengine/Exceptions/__exception_file_generator.py +96 -0
- vtlengine/Exceptions/__init__.py +159 -0
- vtlengine/Exceptions/messages.py +1004 -0
- vtlengine/Interpreter/__init__.py +2048 -0
- vtlengine/Model/__init__.py +501 -0
- vtlengine/Operators/Aggregation.py +357 -0
- vtlengine/Operators/Analytic.py +455 -0
- vtlengine/Operators/Assignment.py +23 -0
- vtlengine/Operators/Boolean.py +106 -0
- vtlengine/Operators/CastOperator.py +451 -0
- vtlengine/Operators/Clause.py +366 -0
- vtlengine/Operators/Comparison.py +488 -0
- vtlengine/Operators/Conditional.py +495 -0
- vtlengine/Operators/General.py +191 -0
- vtlengine/Operators/HROperators.py +254 -0
- vtlengine/Operators/Join.py +447 -0
- vtlengine/Operators/Numeric.py +422 -0
- vtlengine/Operators/RoleSetter.py +77 -0
- vtlengine/Operators/Set.py +176 -0
- vtlengine/Operators/String.py +578 -0
- vtlengine/Operators/Time.py +1144 -0
- vtlengine/Operators/Validation.py +275 -0
- vtlengine/Operators/__init__.py +900 -0
- vtlengine/Utils/__Virtual_Assets.py +34 -0
- vtlengine/Utils/__init__.py +479 -0
- vtlengine/__extras_check.py +17 -0
- vtlengine/__init__.py +27 -0
- vtlengine/files/__init__.py +0 -0
- vtlengine/files/output/__init__.py +35 -0
- vtlengine/files/output/_time_period_representation.py +55 -0
- vtlengine/files/parser/__init__.py +240 -0
- vtlengine/files/parser/_rfc_dialect.py +22 -0
- vtlengine/py.typed +0 -0
- vtlengine-1.4.0rc2.dist-info/METADATA +89 -0
- vtlengine-1.4.0rc2.dist-info/RECORD +66 -0
- vtlengine-1.4.0rc2.dist-info/WHEEL +4 -0
- vtlengine-1.4.0rc2.dist-info/licenses/LICENSE.md +661 -0
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
from copy import copy
|
|
2
|
+
from typing import Any, Dict, Optional, Union
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
from vtlengine.AST.Grammar.tokens import CHECK, CHECK_HIERARCHY
|
|
7
|
+
from vtlengine.DataTypes import (
|
|
8
|
+
Boolean,
|
|
9
|
+
Integer,
|
|
10
|
+
Number,
|
|
11
|
+
String,
|
|
12
|
+
check_unary_implicit_promotion,
|
|
13
|
+
)
|
|
14
|
+
from vtlengine.Exceptions import SemanticError
|
|
15
|
+
from vtlengine.Model import Component, Dataset, Role
|
|
16
|
+
from vtlengine.Operators import Operator
|
|
17
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# noinspection PyTypeChecker
|
|
21
|
+
class Check(Operator):
|
|
22
|
+
op = CHECK
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def validate(
|
|
26
|
+
cls,
|
|
27
|
+
validation_element: Dataset,
|
|
28
|
+
imbalance_element: Optional[Dataset],
|
|
29
|
+
error_code: Optional[str],
|
|
30
|
+
error_level: Optional[Union[int, str]],
|
|
31
|
+
invalid: bool,
|
|
32
|
+
) -> Dataset:
|
|
33
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
34
|
+
if len(validation_element.get_measures()) != 1:
|
|
35
|
+
raise SemanticError("1-1-10-1", op=cls.op, op_type="validation", me_type="Boolean")
|
|
36
|
+
measure = validation_element.get_measures()[0]
|
|
37
|
+
if measure.data_type != Boolean:
|
|
38
|
+
raise SemanticError("1-1-10-1", op=cls.op, op_type="validation", me_type="Boolean")
|
|
39
|
+
error_level_type = None
|
|
40
|
+
if error_level is None or isinstance(error_level, int):
|
|
41
|
+
error_level_type = Integer
|
|
42
|
+
elif isinstance(error_level, str):
|
|
43
|
+
error_level_type = String # type: ignore[assignment]
|
|
44
|
+
else:
|
|
45
|
+
error_level_type = String
|
|
46
|
+
|
|
47
|
+
imbalance_measure = None
|
|
48
|
+
if imbalance_element is not None:
|
|
49
|
+
operand_identifiers = validation_element.get_identifiers_names()
|
|
50
|
+
imbalance_identifiers = imbalance_element.get_identifiers_names()
|
|
51
|
+
if operand_identifiers != imbalance_identifiers:
|
|
52
|
+
raise Exception(
|
|
53
|
+
"The validation and imbalance operands must have the same identifiers"
|
|
54
|
+
)
|
|
55
|
+
if len(imbalance_element.get_measures()) != 1:
|
|
56
|
+
raise SemanticError("1-1-10-1", op=cls.op, op_type="imbalance", me_type="Numeric")
|
|
57
|
+
|
|
58
|
+
imbalance_measure = imbalance_element.get_measures()[0]
|
|
59
|
+
if imbalance_measure.data_type not in (Number, Integer):
|
|
60
|
+
raise SemanticError("1-1-10-1", op=cls.op, op_type="imbalance", me_type="Numeric")
|
|
61
|
+
|
|
62
|
+
# Generating the result dataset components
|
|
63
|
+
result_components = {
|
|
64
|
+
comp.name: comp
|
|
65
|
+
for comp in validation_element.components.values()
|
|
66
|
+
if comp.role in [Role.IDENTIFIER, Role.MEASURE]
|
|
67
|
+
}
|
|
68
|
+
if imbalance_measure is None:
|
|
69
|
+
result_components["imbalance"] = Component(
|
|
70
|
+
name="imbalance", data_type=Number, role=Role.MEASURE, nullable=True
|
|
71
|
+
)
|
|
72
|
+
else:
|
|
73
|
+
result_components["imbalance"] = copy(imbalance_measure)
|
|
74
|
+
result_components["imbalance"].name = "imbalance"
|
|
75
|
+
|
|
76
|
+
result_components["errorcode"] = Component(
|
|
77
|
+
name="errorcode", data_type=String, role=Role.MEASURE, nullable=True
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
result_components["errorlevel"] = Component(
|
|
81
|
+
name="errorlevel",
|
|
82
|
+
data_type=error_level_type, # type: ignore[arg-type]
|
|
83
|
+
role=Role.MEASURE,
|
|
84
|
+
nullable=True,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
88
|
+
|
|
89
|
+
@classmethod
|
|
90
|
+
def evaluate(
|
|
91
|
+
cls,
|
|
92
|
+
validation_element: Dataset,
|
|
93
|
+
imbalance_element: Optional[Dataset],
|
|
94
|
+
error_code: Optional[str],
|
|
95
|
+
error_level: Optional[Union[int, str]],
|
|
96
|
+
invalid: bool,
|
|
97
|
+
) -> Dataset:
|
|
98
|
+
result = cls.validate(
|
|
99
|
+
validation_element, imbalance_element, error_code, error_level, invalid
|
|
100
|
+
)
|
|
101
|
+
if validation_element.data is None:
|
|
102
|
+
validation_element.data = pd.DataFrame()
|
|
103
|
+
columns_to_keep = (
|
|
104
|
+
validation_element.get_identifiers_names() + validation_element.get_measures_names()
|
|
105
|
+
)
|
|
106
|
+
result.data = validation_element.data.loc[:, columns_to_keep]
|
|
107
|
+
if imbalance_element is not None and imbalance_element.data is not None:
|
|
108
|
+
imbalance_measure_name = imbalance_element.get_measures_names()[0]
|
|
109
|
+
result.data["imbalance"] = imbalance_element.data[imbalance_measure_name]
|
|
110
|
+
else:
|
|
111
|
+
result.data["imbalance"] = None
|
|
112
|
+
|
|
113
|
+
result.data["errorcode"] = error_code
|
|
114
|
+
result.data["errorlevel"] = error_level
|
|
115
|
+
if invalid:
|
|
116
|
+
# TODO: Is this always bool_var?? In any case this does the trick for more use cases
|
|
117
|
+
validation_measure_name = validation_element.get_measures_names()[0]
|
|
118
|
+
result.data = result.data[result.data[validation_measure_name] == False]
|
|
119
|
+
result.data.reset_index(drop=True, inplace=True)
|
|
120
|
+
return result
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
# noinspection PyTypeChecker
|
|
124
|
+
class Validation(Operator):
|
|
125
|
+
@classmethod
|
|
126
|
+
def _generate_result_data(cls, rule_info: Dict[str, Any]) -> pd.DataFrame:
|
|
127
|
+
rule_list_df = []
|
|
128
|
+
for rule_name, rule_data in rule_info.items():
|
|
129
|
+
rule_df = rule_data["output"]
|
|
130
|
+
rule_df["ruleid"] = rule_name
|
|
131
|
+
rule_df["errorcode"] = rule_df["bool_var"].map({False: rule_data["errorcode"]})
|
|
132
|
+
rule_df["errorlevel"] = rule_df["bool_var"].map({False: rule_data["errorlevel"]})
|
|
133
|
+
rule_list_df.append(rule_df)
|
|
134
|
+
|
|
135
|
+
if len(rule_list_df) == 1:
|
|
136
|
+
return rule_list_df[0]
|
|
137
|
+
df = pd.concat(rule_list_df, ignore_index=True, copy=False)
|
|
138
|
+
return df
|
|
139
|
+
|
|
140
|
+
@classmethod
|
|
141
|
+
def validate(cls, dataset_element: Dataset, rule_info: Dict[str, Any], output: str) -> Dataset:
|
|
142
|
+
error_level_type = None
|
|
143
|
+
error_levels = [
|
|
144
|
+
rule_data.get("errorlevel")
|
|
145
|
+
for rule_data in rule_info.values()
|
|
146
|
+
if "errorlevel" in rule_data
|
|
147
|
+
]
|
|
148
|
+
non_null_levels = [el for el in error_levels if el is not None]
|
|
149
|
+
|
|
150
|
+
if len(non_null_levels) == 0 or all(isinstance(el, int) for el in non_null_levels):
|
|
151
|
+
error_level_type = Number
|
|
152
|
+
elif all(isinstance(el, str) for el in non_null_levels):
|
|
153
|
+
error_level_type = String # type: ignore[assignment]
|
|
154
|
+
else:
|
|
155
|
+
error_level_type = String # type: ignore[assignment]
|
|
156
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
157
|
+
result_components = {comp.name: comp for comp in dataset_element.get_identifiers()}
|
|
158
|
+
result_components["ruleid"] = Component(
|
|
159
|
+
name="ruleid", data_type=String, role=Role.IDENTIFIER, nullable=False
|
|
160
|
+
)
|
|
161
|
+
if output == "invalid":
|
|
162
|
+
result_components = {
|
|
163
|
+
**result_components,
|
|
164
|
+
**{comp.name: copy(comp) for comp in dataset_element.get_measures()},
|
|
165
|
+
}
|
|
166
|
+
elif output == "all":
|
|
167
|
+
result_components["bool_var"] = Component(
|
|
168
|
+
name="bool_var", data_type=Boolean, role=Role.MEASURE, nullable=True
|
|
169
|
+
)
|
|
170
|
+
else: # output == 'all_measures'
|
|
171
|
+
result_components = {
|
|
172
|
+
**result_components,
|
|
173
|
+
**{comp.name: copy(comp) for comp in dataset_element.get_measures()},
|
|
174
|
+
"bool_var": Component(
|
|
175
|
+
name="bool_var", data_type=Boolean, role=Role.MEASURE, nullable=True
|
|
176
|
+
),
|
|
177
|
+
}
|
|
178
|
+
result_components["errorcode"] = Component(
|
|
179
|
+
name="errorcode", data_type=String, role=Role.MEASURE, nullable=True
|
|
180
|
+
)
|
|
181
|
+
result_components["errorlevel"] = Component(
|
|
182
|
+
name="errorlevel",
|
|
183
|
+
data_type=error_level_type, # type: ignore[arg-type]
|
|
184
|
+
role=Role.MEASURE,
|
|
185
|
+
nullable=True,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
189
|
+
|
|
190
|
+
@classmethod
|
|
191
|
+
def evaluate(cls, dataset_element: Dataset, rule_info: Dict[str, Any], output: str) -> Dataset:
|
|
192
|
+
result = cls.validate(dataset_element, rule_info, output)
|
|
193
|
+
result.data = cls._generate_result_data(rule_info)
|
|
194
|
+
|
|
195
|
+
result.data = result.data.dropna(subset=result.get_identifiers_names(), how="any")
|
|
196
|
+
result.data = result.data.drop_duplicates(
|
|
197
|
+
subset=result.get_identifiers_names() + ["ruleid"]
|
|
198
|
+
).reset_index(drop=True)
|
|
199
|
+
validation_measures = ["bool_var", "errorcode", "errorlevel"]
|
|
200
|
+
# Only for check hierarchy
|
|
201
|
+
if "imbalance" in result.components:
|
|
202
|
+
validation_measures.append("imbalance")
|
|
203
|
+
if output == "invalid":
|
|
204
|
+
result.data = result.data[result.data["bool_var"] == False]
|
|
205
|
+
result.data = result.data.drop(columns=["bool_var"])
|
|
206
|
+
result.data.reset_index(drop=True, inplace=True)
|
|
207
|
+
elif output == "all":
|
|
208
|
+
result.data = result.data[result.get_identifiers_names() + validation_measures]
|
|
209
|
+
else: # output == 'all_measures'
|
|
210
|
+
result.data = result.data[
|
|
211
|
+
result.get_identifiers_names()
|
|
212
|
+
+ dataset_element.get_measures_names()
|
|
213
|
+
+ validation_measures
|
|
214
|
+
]
|
|
215
|
+
|
|
216
|
+
result.data = result.data[result.get_components_names()]
|
|
217
|
+
return result
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
class Check_Datapoint(Validation):
|
|
221
|
+
pass
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
class Check_Hierarchy(Validation):
|
|
225
|
+
op = CHECK_HIERARCHY
|
|
226
|
+
|
|
227
|
+
@classmethod
|
|
228
|
+
def _generate_result_data(cls, rule_info: Dict[str, Any]) -> pd.DataFrame:
|
|
229
|
+
df = pd.DataFrame()
|
|
230
|
+
for rule_name, rule_data in rule_info.items():
|
|
231
|
+
rule_df = rule_data["output"]
|
|
232
|
+
rule_df["ruleid"] = rule_name
|
|
233
|
+
rule_df["errorcode"] = rule_data["errorcode"]
|
|
234
|
+
rule_df["errorlevel"] = rule_data["errorlevel"]
|
|
235
|
+
df = pd.concat([df, rule_df], ignore_index=True)
|
|
236
|
+
if df is None:
|
|
237
|
+
df = pd.DataFrame()
|
|
238
|
+
return df
|
|
239
|
+
|
|
240
|
+
@classmethod
|
|
241
|
+
def validate(cls, dataset_element: Dataset, rule_info: Dict[str, Any], output: str) -> Dataset:
|
|
242
|
+
result = super().validate(dataset_element, rule_info, output)
|
|
243
|
+
result.components["imbalance"] = Component(
|
|
244
|
+
name="imbalance", data_type=Number, role=Role.MEASURE, nullable=True
|
|
245
|
+
)
|
|
246
|
+
return result
|
|
247
|
+
|
|
248
|
+
@staticmethod
|
|
249
|
+
def validate_hr_dataset(dataset: Dataset, component_name: str) -> None:
|
|
250
|
+
if len(dataset.get_measures()) != 1:
|
|
251
|
+
raise SemanticError(
|
|
252
|
+
"1-1-10-1", op=Check_Hierarchy.op, op_type="hierarchy", me_type="Number"
|
|
253
|
+
)
|
|
254
|
+
measure = dataset.get_measures()[0]
|
|
255
|
+
if not check_unary_implicit_promotion(measure.data_type, Number):
|
|
256
|
+
raise SemanticError(
|
|
257
|
+
"1-1-10-1", op=Check_Hierarchy.op, op_type="hierarchy", me_type="Number"
|
|
258
|
+
)
|
|
259
|
+
if component_name not in dataset.components:
|
|
260
|
+
raise SemanticError(
|
|
261
|
+
"1-1-1-10",
|
|
262
|
+
op=Check_Hierarchy.op,
|
|
263
|
+
comp_name=component_name,
|
|
264
|
+
dataset_name=dataset.name,
|
|
265
|
+
)
|
|
266
|
+
if dataset.components[component_name].role != Role.IDENTIFIER:
|
|
267
|
+
raise SemanticError(
|
|
268
|
+
"1-2-7",
|
|
269
|
+
name=component_name,
|
|
270
|
+
role=dataset.components[component_name].role.value,
|
|
271
|
+
)
|
|
272
|
+
# Remove attributes from dataset
|
|
273
|
+
if len(dataset.get_attributes()) > 0:
|
|
274
|
+
for x in dataset.get_attributes():
|
|
275
|
+
dataset.delete_component(x.name)
|