vtlengine 1.4.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. vtlengine/API/_InternalApi.py +791 -0
  2. vtlengine/API/__init__.py +612 -0
  3. vtlengine/API/data/schema/external_routines_schema.json +34 -0
  4. vtlengine/API/data/schema/json_schema_2.1.json +116 -0
  5. vtlengine/API/data/schema/value_domain_schema.json +97 -0
  6. vtlengine/AST/ASTComment.py +57 -0
  7. vtlengine/AST/ASTConstructor.py +598 -0
  8. vtlengine/AST/ASTConstructorModules/Expr.py +1928 -0
  9. vtlengine/AST/ASTConstructorModules/ExprComponents.py +995 -0
  10. vtlengine/AST/ASTConstructorModules/Terminals.py +790 -0
  11. vtlengine/AST/ASTConstructorModules/__init__.py +50 -0
  12. vtlengine/AST/ASTDataExchange.py +10 -0
  13. vtlengine/AST/ASTEncoders.py +32 -0
  14. vtlengine/AST/ASTString.py +675 -0
  15. vtlengine/AST/ASTTemplate.py +558 -0
  16. vtlengine/AST/ASTVisitor.py +25 -0
  17. vtlengine/AST/DAG/__init__.py +479 -0
  18. vtlengine/AST/DAG/_words.py +10 -0
  19. vtlengine/AST/Grammar/Vtl.g4 +705 -0
  20. vtlengine/AST/Grammar/VtlTokens.g4 +409 -0
  21. vtlengine/AST/Grammar/__init__.py +0 -0
  22. vtlengine/AST/Grammar/lexer.py +2139 -0
  23. vtlengine/AST/Grammar/parser.py +16597 -0
  24. vtlengine/AST/Grammar/tokens.py +169 -0
  25. vtlengine/AST/VtlVisitor.py +824 -0
  26. vtlengine/AST/__init__.py +674 -0
  27. vtlengine/DataTypes/TimeHandling.py +562 -0
  28. vtlengine/DataTypes/__init__.py +863 -0
  29. vtlengine/DataTypes/_time_checking.py +135 -0
  30. vtlengine/Exceptions/__exception_file_generator.py +96 -0
  31. vtlengine/Exceptions/__init__.py +159 -0
  32. vtlengine/Exceptions/messages.py +1004 -0
  33. vtlengine/Interpreter/__init__.py +2048 -0
  34. vtlengine/Model/__init__.py +501 -0
  35. vtlengine/Operators/Aggregation.py +357 -0
  36. vtlengine/Operators/Analytic.py +455 -0
  37. vtlengine/Operators/Assignment.py +23 -0
  38. vtlengine/Operators/Boolean.py +106 -0
  39. vtlengine/Operators/CastOperator.py +451 -0
  40. vtlengine/Operators/Clause.py +366 -0
  41. vtlengine/Operators/Comparison.py +488 -0
  42. vtlengine/Operators/Conditional.py +495 -0
  43. vtlengine/Operators/General.py +191 -0
  44. vtlengine/Operators/HROperators.py +254 -0
  45. vtlengine/Operators/Join.py +447 -0
  46. vtlengine/Operators/Numeric.py +422 -0
  47. vtlengine/Operators/RoleSetter.py +77 -0
  48. vtlengine/Operators/Set.py +176 -0
  49. vtlengine/Operators/String.py +578 -0
  50. vtlengine/Operators/Time.py +1144 -0
  51. vtlengine/Operators/Validation.py +275 -0
  52. vtlengine/Operators/__init__.py +900 -0
  53. vtlengine/Utils/__Virtual_Assets.py +34 -0
  54. vtlengine/Utils/__init__.py +479 -0
  55. vtlengine/__extras_check.py +17 -0
  56. vtlengine/__init__.py +27 -0
  57. vtlengine/files/__init__.py +0 -0
  58. vtlengine/files/output/__init__.py +35 -0
  59. vtlengine/files/output/_time_period_representation.py +55 -0
  60. vtlengine/files/parser/__init__.py +240 -0
  61. vtlengine/files/parser/_rfc_dialect.py +22 -0
  62. vtlengine/py.typed +0 -0
  63. vtlengine-1.4.0rc2.dist-info/METADATA +89 -0
  64. vtlengine-1.4.0rc2.dist-info/RECORD +66 -0
  65. vtlengine-1.4.0rc2.dist-info/WHEEL +4 -0
  66. vtlengine-1.4.0rc2.dist-info/licenses/LICENSE.md +661 -0
@@ -0,0 +1,275 @@
1
+ from copy import copy
2
+ from typing import Any, Dict, Optional, Union
3
+
4
+ import pandas as pd
5
+
6
+ from vtlengine.AST.Grammar.tokens import CHECK, CHECK_HIERARCHY
7
+ from vtlengine.DataTypes import (
8
+ Boolean,
9
+ Integer,
10
+ Number,
11
+ String,
12
+ check_unary_implicit_promotion,
13
+ )
14
+ from vtlengine.Exceptions import SemanticError
15
+ from vtlengine.Model import Component, Dataset, Role
16
+ from vtlengine.Operators import Operator
17
+ from vtlengine.Utils.__Virtual_Assets import VirtualCounter
18
+
19
+
20
+ # noinspection PyTypeChecker
21
+ class Check(Operator):
22
+ op = CHECK
23
+
24
+ @classmethod
25
+ def validate(
26
+ cls,
27
+ validation_element: Dataset,
28
+ imbalance_element: Optional[Dataset],
29
+ error_code: Optional[str],
30
+ error_level: Optional[Union[int, str]],
31
+ invalid: bool,
32
+ ) -> Dataset:
33
+ dataset_name = VirtualCounter._new_ds_name()
34
+ if len(validation_element.get_measures()) != 1:
35
+ raise SemanticError("1-1-10-1", op=cls.op, op_type="validation", me_type="Boolean")
36
+ measure = validation_element.get_measures()[0]
37
+ if measure.data_type != Boolean:
38
+ raise SemanticError("1-1-10-1", op=cls.op, op_type="validation", me_type="Boolean")
39
+ error_level_type = None
40
+ if error_level is None or isinstance(error_level, int):
41
+ error_level_type = Integer
42
+ elif isinstance(error_level, str):
43
+ error_level_type = String # type: ignore[assignment]
44
+ else:
45
+ error_level_type = String
46
+
47
+ imbalance_measure = None
48
+ if imbalance_element is not None:
49
+ operand_identifiers = validation_element.get_identifiers_names()
50
+ imbalance_identifiers = imbalance_element.get_identifiers_names()
51
+ if operand_identifiers != imbalance_identifiers:
52
+ raise Exception(
53
+ "The validation and imbalance operands must have the same identifiers"
54
+ )
55
+ if len(imbalance_element.get_measures()) != 1:
56
+ raise SemanticError("1-1-10-1", op=cls.op, op_type="imbalance", me_type="Numeric")
57
+
58
+ imbalance_measure = imbalance_element.get_measures()[0]
59
+ if imbalance_measure.data_type not in (Number, Integer):
60
+ raise SemanticError("1-1-10-1", op=cls.op, op_type="imbalance", me_type="Numeric")
61
+
62
+ # Generating the result dataset components
63
+ result_components = {
64
+ comp.name: comp
65
+ for comp in validation_element.components.values()
66
+ if comp.role in [Role.IDENTIFIER, Role.MEASURE]
67
+ }
68
+ if imbalance_measure is None:
69
+ result_components["imbalance"] = Component(
70
+ name="imbalance", data_type=Number, role=Role.MEASURE, nullable=True
71
+ )
72
+ else:
73
+ result_components["imbalance"] = copy(imbalance_measure)
74
+ result_components["imbalance"].name = "imbalance"
75
+
76
+ result_components["errorcode"] = Component(
77
+ name="errorcode", data_type=String, role=Role.MEASURE, nullable=True
78
+ )
79
+
80
+ result_components["errorlevel"] = Component(
81
+ name="errorlevel",
82
+ data_type=error_level_type, # type: ignore[arg-type]
83
+ role=Role.MEASURE,
84
+ nullable=True,
85
+ )
86
+
87
+ return Dataset(name=dataset_name, components=result_components, data=None)
88
+
89
+ @classmethod
90
+ def evaluate(
91
+ cls,
92
+ validation_element: Dataset,
93
+ imbalance_element: Optional[Dataset],
94
+ error_code: Optional[str],
95
+ error_level: Optional[Union[int, str]],
96
+ invalid: bool,
97
+ ) -> Dataset:
98
+ result = cls.validate(
99
+ validation_element, imbalance_element, error_code, error_level, invalid
100
+ )
101
+ if validation_element.data is None:
102
+ validation_element.data = pd.DataFrame()
103
+ columns_to_keep = (
104
+ validation_element.get_identifiers_names() + validation_element.get_measures_names()
105
+ )
106
+ result.data = validation_element.data.loc[:, columns_to_keep]
107
+ if imbalance_element is not None and imbalance_element.data is not None:
108
+ imbalance_measure_name = imbalance_element.get_measures_names()[0]
109
+ result.data["imbalance"] = imbalance_element.data[imbalance_measure_name]
110
+ else:
111
+ result.data["imbalance"] = None
112
+
113
+ result.data["errorcode"] = error_code
114
+ result.data["errorlevel"] = error_level
115
+ if invalid:
116
+ # TODO: Is this always bool_var?? In any case this does the trick for more use cases
117
+ validation_measure_name = validation_element.get_measures_names()[0]
118
+ result.data = result.data[result.data[validation_measure_name] == False]
119
+ result.data.reset_index(drop=True, inplace=True)
120
+ return result
121
+
122
+
123
+ # noinspection PyTypeChecker
124
+ class Validation(Operator):
125
+ @classmethod
126
+ def _generate_result_data(cls, rule_info: Dict[str, Any]) -> pd.DataFrame:
127
+ rule_list_df = []
128
+ for rule_name, rule_data in rule_info.items():
129
+ rule_df = rule_data["output"]
130
+ rule_df["ruleid"] = rule_name
131
+ rule_df["errorcode"] = rule_df["bool_var"].map({False: rule_data["errorcode"]})
132
+ rule_df["errorlevel"] = rule_df["bool_var"].map({False: rule_data["errorlevel"]})
133
+ rule_list_df.append(rule_df)
134
+
135
+ if len(rule_list_df) == 1:
136
+ return rule_list_df[0]
137
+ df = pd.concat(rule_list_df, ignore_index=True, copy=False)
138
+ return df
139
+
140
+ @classmethod
141
+ def validate(cls, dataset_element: Dataset, rule_info: Dict[str, Any], output: str) -> Dataset:
142
+ error_level_type = None
143
+ error_levels = [
144
+ rule_data.get("errorlevel")
145
+ for rule_data in rule_info.values()
146
+ if "errorlevel" in rule_data
147
+ ]
148
+ non_null_levels = [el for el in error_levels if el is not None]
149
+
150
+ if len(non_null_levels) == 0 or all(isinstance(el, int) for el in non_null_levels):
151
+ error_level_type = Number
152
+ elif all(isinstance(el, str) for el in non_null_levels):
153
+ error_level_type = String # type: ignore[assignment]
154
+ else:
155
+ error_level_type = String # type: ignore[assignment]
156
+ dataset_name = VirtualCounter._new_ds_name()
157
+ result_components = {comp.name: comp for comp in dataset_element.get_identifiers()}
158
+ result_components["ruleid"] = Component(
159
+ name="ruleid", data_type=String, role=Role.IDENTIFIER, nullable=False
160
+ )
161
+ if output == "invalid":
162
+ result_components = {
163
+ **result_components,
164
+ **{comp.name: copy(comp) for comp in dataset_element.get_measures()},
165
+ }
166
+ elif output == "all":
167
+ result_components["bool_var"] = Component(
168
+ name="bool_var", data_type=Boolean, role=Role.MEASURE, nullable=True
169
+ )
170
+ else: # output == 'all_measures'
171
+ result_components = {
172
+ **result_components,
173
+ **{comp.name: copy(comp) for comp in dataset_element.get_measures()},
174
+ "bool_var": Component(
175
+ name="bool_var", data_type=Boolean, role=Role.MEASURE, nullable=True
176
+ ),
177
+ }
178
+ result_components["errorcode"] = Component(
179
+ name="errorcode", data_type=String, role=Role.MEASURE, nullable=True
180
+ )
181
+ result_components["errorlevel"] = Component(
182
+ name="errorlevel",
183
+ data_type=error_level_type, # type: ignore[arg-type]
184
+ role=Role.MEASURE,
185
+ nullable=True,
186
+ )
187
+
188
+ return Dataset(name=dataset_name, components=result_components, data=None)
189
+
190
+ @classmethod
191
+ def evaluate(cls, dataset_element: Dataset, rule_info: Dict[str, Any], output: str) -> Dataset:
192
+ result = cls.validate(dataset_element, rule_info, output)
193
+ result.data = cls._generate_result_data(rule_info)
194
+
195
+ result.data = result.data.dropna(subset=result.get_identifiers_names(), how="any")
196
+ result.data = result.data.drop_duplicates(
197
+ subset=result.get_identifiers_names() + ["ruleid"]
198
+ ).reset_index(drop=True)
199
+ validation_measures = ["bool_var", "errorcode", "errorlevel"]
200
+ # Only for check hierarchy
201
+ if "imbalance" in result.components:
202
+ validation_measures.append("imbalance")
203
+ if output == "invalid":
204
+ result.data = result.data[result.data["bool_var"] == False]
205
+ result.data = result.data.drop(columns=["bool_var"])
206
+ result.data.reset_index(drop=True, inplace=True)
207
+ elif output == "all":
208
+ result.data = result.data[result.get_identifiers_names() + validation_measures]
209
+ else: # output == 'all_measures'
210
+ result.data = result.data[
211
+ result.get_identifiers_names()
212
+ + dataset_element.get_measures_names()
213
+ + validation_measures
214
+ ]
215
+
216
+ result.data = result.data[result.get_components_names()]
217
+ return result
218
+
219
+
220
+ class Check_Datapoint(Validation):
221
+ pass
222
+
223
+
224
+ class Check_Hierarchy(Validation):
225
+ op = CHECK_HIERARCHY
226
+
227
+ @classmethod
228
+ def _generate_result_data(cls, rule_info: Dict[str, Any]) -> pd.DataFrame:
229
+ df = pd.DataFrame()
230
+ for rule_name, rule_data in rule_info.items():
231
+ rule_df = rule_data["output"]
232
+ rule_df["ruleid"] = rule_name
233
+ rule_df["errorcode"] = rule_data["errorcode"]
234
+ rule_df["errorlevel"] = rule_data["errorlevel"]
235
+ df = pd.concat([df, rule_df], ignore_index=True)
236
+ if df is None:
237
+ df = pd.DataFrame()
238
+ return df
239
+
240
+ @classmethod
241
+ def validate(cls, dataset_element: Dataset, rule_info: Dict[str, Any], output: str) -> Dataset:
242
+ result = super().validate(dataset_element, rule_info, output)
243
+ result.components["imbalance"] = Component(
244
+ name="imbalance", data_type=Number, role=Role.MEASURE, nullable=True
245
+ )
246
+ return result
247
+
248
+ @staticmethod
249
+ def validate_hr_dataset(dataset: Dataset, component_name: str) -> None:
250
+ if len(dataset.get_measures()) != 1:
251
+ raise SemanticError(
252
+ "1-1-10-1", op=Check_Hierarchy.op, op_type="hierarchy", me_type="Number"
253
+ )
254
+ measure = dataset.get_measures()[0]
255
+ if not check_unary_implicit_promotion(measure.data_type, Number):
256
+ raise SemanticError(
257
+ "1-1-10-1", op=Check_Hierarchy.op, op_type="hierarchy", me_type="Number"
258
+ )
259
+ if component_name not in dataset.components:
260
+ raise SemanticError(
261
+ "1-1-1-10",
262
+ op=Check_Hierarchy.op,
263
+ comp_name=component_name,
264
+ dataset_name=dataset.name,
265
+ )
266
+ if dataset.components[component_name].role != Role.IDENTIFIER:
267
+ raise SemanticError(
268
+ "1-2-7",
269
+ name=component_name,
270
+ role=dataset.components[component_name].role.value,
271
+ )
272
+ # Remove attributes from dataset
273
+ if len(dataset.get_attributes()) > 0:
274
+ for x in dataset.get_attributes():
275
+ dataset.delete_component(x.name)