vtlengine 1.4.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. vtlengine/API/_InternalApi.py +791 -0
  2. vtlengine/API/__init__.py +612 -0
  3. vtlengine/API/data/schema/external_routines_schema.json +34 -0
  4. vtlengine/API/data/schema/json_schema_2.1.json +116 -0
  5. vtlengine/API/data/schema/value_domain_schema.json +97 -0
  6. vtlengine/AST/ASTComment.py +57 -0
  7. vtlengine/AST/ASTConstructor.py +598 -0
  8. vtlengine/AST/ASTConstructorModules/Expr.py +1928 -0
  9. vtlengine/AST/ASTConstructorModules/ExprComponents.py +995 -0
  10. vtlengine/AST/ASTConstructorModules/Terminals.py +790 -0
  11. vtlengine/AST/ASTConstructorModules/__init__.py +50 -0
  12. vtlengine/AST/ASTDataExchange.py +10 -0
  13. vtlengine/AST/ASTEncoders.py +32 -0
  14. vtlengine/AST/ASTString.py +675 -0
  15. vtlengine/AST/ASTTemplate.py +558 -0
  16. vtlengine/AST/ASTVisitor.py +25 -0
  17. vtlengine/AST/DAG/__init__.py +479 -0
  18. vtlengine/AST/DAG/_words.py +10 -0
  19. vtlengine/AST/Grammar/Vtl.g4 +705 -0
  20. vtlengine/AST/Grammar/VtlTokens.g4 +409 -0
  21. vtlengine/AST/Grammar/__init__.py +0 -0
  22. vtlengine/AST/Grammar/lexer.py +2139 -0
  23. vtlengine/AST/Grammar/parser.py +16597 -0
  24. vtlengine/AST/Grammar/tokens.py +169 -0
  25. vtlengine/AST/VtlVisitor.py +824 -0
  26. vtlengine/AST/__init__.py +674 -0
  27. vtlengine/DataTypes/TimeHandling.py +562 -0
  28. vtlengine/DataTypes/__init__.py +863 -0
  29. vtlengine/DataTypes/_time_checking.py +135 -0
  30. vtlengine/Exceptions/__exception_file_generator.py +96 -0
  31. vtlengine/Exceptions/__init__.py +159 -0
  32. vtlengine/Exceptions/messages.py +1004 -0
  33. vtlengine/Interpreter/__init__.py +2048 -0
  34. vtlengine/Model/__init__.py +501 -0
  35. vtlengine/Operators/Aggregation.py +357 -0
  36. vtlengine/Operators/Analytic.py +455 -0
  37. vtlengine/Operators/Assignment.py +23 -0
  38. vtlengine/Operators/Boolean.py +106 -0
  39. vtlengine/Operators/CastOperator.py +451 -0
  40. vtlengine/Operators/Clause.py +366 -0
  41. vtlengine/Operators/Comparison.py +488 -0
  42. vtlengine/Operators/Conditional.py +495 -0
  43. vtlengine/Operators/General.py +191 -0
  44. vtlengine/Operators/HROperators.py +254 -0
  45. vtlengine/Operators/Join.py +447 -0
  46. vtlengine/Operators/Numeric.py +422 -0
  47. vtlengine/Operators/RoleSetter.py +77 -0
  48. vtlengine/Operators/Set.py +176 -0
  49. vtlengine/Operators/String.py +578 -0
  50. vtlengine/Operators/Time.py +1144 -0
  51. vtlengine/Operators/Validation.py +275 -0
  52. vtlengine/Operators/__init__.py +900 -0
  53. vtlengine/Utils/__Virtual_Assets.py +34 -0
  54. vtlengine/Utils/__init__.py +479 -0
  55. vtlengine/__extras_check.py +17 -0
  56. vtlengine/__init__.py +27 -0
  57. vtlengine/files/__init__.py +0 -0
  58. vtlengine/files/output/__init__.py +35 -0
  59. vtlengine/files/output/_time_period_representation.py +55 -0
  60. vtlengine/files/parser/__init__.py +240 -0
  61. vtlengine/files/parser/_rfc_dialect.py +22 -0
  62. vtlengine/py.typed +0 -0
  63. vtlengine-1.4.0rc2.dist-info/METADATA +89 -0
  64. vtlengine-1.4.0rc2.dist-info/RECORD +66 -0
  65. vtlengine-1.4.0rc2.dist-info/WHEEL +4 -0
  66. vtlengine-1.4.0rc2.dist-info/licenses/LICENSE.md +661 -0
@@ -0,0 +1,366 @@
1
+ from copy import copy
2
+ from typing import List, Type, Union
3
+
4
+ import pandas as pd
5
+
6
+ from vtlengine.AST import RenameNode
7
+ from vtlengine.AST.Grammar.tokens import AGGREGATE, CALC, DROP, KEEP, RENAME, SUBSPACE
8
+ from vtlengine.DataTypes import (
9
+ Boolean,
10
+ ScalarType,
11
+ String,
12
+ check_unary_implicit_promotion,
13
+ unary_implicit_promotion,
14
+ )
15
+ from vtlengine.Exceptions import SemanticError
16
+ from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar
17
+ from vtlengine.Operators import Operator
18
+ from vtlengine.Utils.__Virtual_Assets import VirtualCounter
19
+
20
+
21
+ class Calc(Operator):
22
+ op = CALC
23
+
24
+ @classmethod
25
+ def validate(cls, operands: List[Union[DataComponent, Scalar]], dataset: Dataset) -> Dataset:
26
+ result_components = {name: copy(comp) for name, comp in dataset.components.items()}
27
+ dataset_name = VirtualCounter._new_ds_name()
28
+ result_dataset = Dataset(name=dataset_name, components=result_components, data=None)
29
+
30
+ for operand in operands:
31
+ if operand.name in result_dataset.components:
32
+ if result_dataset.components[operand.name].role == Role.IDENTIFIER:
33
+ raise SemanticError("1-1-6-13", op=cls.op, comp_name=operand.name)
34
+ # Override component with same name
35
+ # TODO: Check this for version 2.1
36
+ result_dataset.delete_component(operand.name)
37
+
38
+ if isinstance(operand, Scalar):
39
+ result_dataset.add_component(
40
+ Component(
41
+ name=operand.name,
42
+ data_type=operand.data_type,
43
+ role=Role.MEASURE,
44
+ nullable=True,
45
+ )
46
+ )
47
+ else:
48
+ result_dataset.add_component(
49
+ Component(
50
+ name=operand.name,
51
+ data_type=operand.data_type,
52
+ role=operand.role,
53
+ nullable=operand.nullable,
54
+ )
55
+ )
56
+ return result_dataset
57
+
58
+ @classmethod
59
+ def evaluate(cls, operands: List[Union[DataComponent, Scalar]], dataset: Dataset) -> Dataset:
60
+ result_dataset = cls.validate(operands, dataset)
61
+ result_dataset.data = dataset.data.copy() if dataset.data is not None else pd.DataFrame()
62
+ for operand in operands:
63
+ if isinstance(operand, Scalar):
64
+ result_dataset.data[operand.name] = operand.value
65
+ else:
66
+ result_dataset.data[operand.name] = operand.data
67
+ return result_dataset
68
+
69
+
70
+ class Aggregate(Operator):
71
+ op = AGGREGATE
72
+
73
+ @classmethod
74
+ def validate(cls, operands: List[Union[DataComponent, Scalar]], dataset: Dataset) -> Dataset:
75
+ dataset_name = VirtualCounter._new_ds_name()
76
+ result_dataset = Dataset(name=dataset_name, components=dataset.components, data=None)
77
+
78
+ for operand in operands:
79
+ if operand.name in dataset.get_identifiers_names() or (
80
+ isinstance(operand, DataComponent) and operand.role == Role.IDENTIFIER
81
+ ):
82
+ raise SemanticError("1-1-6-13", op=cls.op, comp_name=operand.name)
83
+
84
+ elif operand.name in dataset.components:
85
+ # Override component with same name
86
+ dataset.delete_component(operand.name)
87
+
88
+ if isinstance(operand, Scalar):
89
+ result_dataset.add_component(
90
+ Component(
91
+ name=operand.name,
92
+ data_type=operand.data_type,
93
+ role=Role.MEASURE,
94
+ nullable=True,
95
+ )
96
+ )
97
+ else:
98
+ result_dataset.add_component(
99
+ Component(
100
+ name=operand.name,
101
+ data_type=operand.data_type,
102
+ role=operand.role,
103
+ nullable=operand.nullable,
104
+ )
105
+ )
106
+ return result_dataset
107
+
108
+ @classmethod
109
+ def evaluate(cls, operands: List[Union[DataComponent, Scalar]], dataset: Dataset) -> Dataset:
110
+ result_dataset = cls.validate(operands, dataset)
111
+ result_dataset.data = copy(dataset.data) if dataset.data is not None else pd.DataFrame()
112
+ for operand in operands:
113
+ if isinstance(operand, Scalar):
114
+ result_dataset.data[operand.name] = operand.value
115
+ else:
116
+ if operand.data is not None and len(operand.data) > 0:
117
+ result_dataset.data[operand.name] = operand.data
118
+ else:
119
+ result_dataset.data[operand.name] = None
120
+ return result_dataset
121
+
122
+
123
+ class Filter(Operator):
124
+ @classmethod
125
+ def validate(cls, condition: DataComponent, dataset: Dataset) -> Dataset:
126
+ if condition.data_type != Boolean:
127
+ raise ValueError(f"Filter condition must be of type {Boolean}")
128
+ dataset_name = VirtualCounter._new_ds_name()
129
+ return Dataset(name=dataset_name, components=dataset.components, data=None)
130
+
131
+ @classmethod
132
+ def evaluate(cls, condition: DataComponent, dataset: Dataset) -> Dataset:
133
+ result_dataset = cls.validate(condition, dataset)
134
+ result_dataset.data = dataset.data.copy() if dataset.data is not None else pd.DataFrame()
135
+ if condition.data is not None and len(condition.data) > 0 and dataset.data is not None:
136
+ true_indexes = condition.data[condition.data == True].index
137
+ result_dataset.data = dataset.data.iloc[true_indexes].reset_index(drop=True)
138
+ return result_dataset
139
+
140
+
141
+ class Keep(Operator):
142
+ op = KEEP
143
+
144
+ @classmethod
145
+ def validate(cls, operands: List[str], dataset: Dataset) -> Dataset:
146
+ dataset_name = VirtualCounter._new_ds_name()
147
+ for operand in operands:
148
+ if operand not in dataset.get_components_names():
149
+ raise SemanticError(
150
+ "1-1-1-10", op=cls.op, comp_name=operand, dataset_name=dataset_name
151
+ )
152
+ if dataset.get_component(operand).role == Role.IDENTIFIER:
153
+ raise SemanticError("1-1-6-2", op=cls.op, name=operand, dataset=dataset_name)
154
+ result_components = {
155
+ name: comp
156
+ for name, comp in dataset.components.items()
157
+ if comp.name in operands or comp.role == Role.IDENTIFIER
158
+ }
159
+ return Dataset(name=dataset_name, components=result_components, data=None)
160
+
161
+ @classmethod
162
+ def evaluate(cls, operands: List[str], dataset: Dataset) -> Dataset:
163
+ if len(operands) == 0:
164
+ raise ValueError("Keep clause requires at least one operand")
165
+ if dataset is None and sum(isinstance(operand, Dataset) for operand in operands) != 1:
166
+ raise ValueError("Keep clause requires at most one dataset operand")
167
+ result_dataset = cls.validate(operands, dataset)
168
+ if dataset.data is not None:
169
+ result_dataset.data = dataset.data[dataset.get_identifiers_names() + operands]
170
+ return result_dataset
171
+
172
+
173
+ class Drop(Operator):
174
+ op = DROP
175
+
176
+ @classmethod
177
+ def validate(cls, operands: List[str], dataset: Dataset) -> Dataset:
178
+ dataset_name = VirtualCounter._new_ds_name()
179
+ for operand in operands:
180
+ if operand not in dataset.components:
181
+ raise SemanticError("1-1-1-10", comp_name=operand, dataset_name=dataset_name)
182
+ if dataset.get_component(operand).role == Role.IDENTIFIER:
183
+ raise SemanticError("1-1-6-2", op=cls.op, name=operand, dataset=dataset_name)
184
+ if len(dataset.components) == len(operands):
185
+ raise SemanticError("1-1-6-12", op=cls.op)
186
+ result_components = {
187
+ name: comp for name, comp in dataset.components.items() if comp.name not in operands
188
+ }
189
+ return Dataset(name=dataset_name, components=result_components, data=None)
190
+
191
+ @classmethod
192
+ def evaluate(cls, operands: List[str], dataset: Dataset) -> Dataset:
193
+ result_dataset = cls.validate(operands, dataset)
194
+ if dataset.data is not None:
195
+ result_dataset.data = dataset.data.drop(columns=operands, axis=1)
196
+ return result_dataset
197
+
198
+
199
+ class Rename(Operator):
200
+ op = RENAME
201
+
202
+ @classmethod
203
+ def validate(cls, operands: List[RenameNode], dataset: Dataset) -> Dataset:
204
+ dataset_name = VirtualCounter._new_ds_name()
205
+ from_names = [operand.old_name for operand in operands]
206
+ if len(from_names) != len(set(from_names)):
207
+ duplicates = set([name for name in from_names if from_names.count(name) > 1])
208
+ raise SemanticError("1-1-6-9", op=cls.op, from_components=duplicates)
209
+
210
+ to_names = [operand.new_name for operand in operands]
211
+ if len(to_names) != len(set(to_names)): # If duplicates
212
+ duplicates = set([name for name in to_names if to_names.count(name) > 1])
213
+ raise SemanticError("1-2-1", alias=duplicates)
214
+
215
+ for operand in operands:
216
+ if operand.old_name not in dataset.components:
217
+ raise SemanticError(
218
+ "1-1-1-10",
219
+ op=cls.op,
220
+ comp_name=operand.old_name,
221
+ dataset_name=dataset_name,
222
+ )
223
+ if operand.new_name in dataset.components:
224
+ raise SemanticError(
225
+ "1-1-6-8",
226
+ op=cls.op,
227
+ comp_name=operand.new_name,
228
+ dataset_name=dataset_name,
229
+ )
230
+
231
+ result_components = {comp.name: comp for comp in dataset.components.values()}
232
+ for operand in operands:
233
+ result_components[operand.new_name] = Component(
234
+ name=operand.new_name,
235
+ data_type=result_components[operand.old_name].data_type,
236
+ role=result_components[operand.old_name].role,
237
+ nullable=result_components[operand.old_name].nullable,
238
+ )
239
+ del result_components[operand.old_name]
240
+ return Dataset(name=dataset_name, components=result_components, data=None)
241
+
242
+ @classmethod
243
+ def evaluate(cls, operands: List[RenameNode], dataset: Dataset) -> Dataset:
244
+ result_dataset = cls.validate(operands, dataset)
245
+ if dataset.data is not None:
246
+ result_dataset.data = dataset.data.rename(
247
+ columns={operand.old_name: operand.new_name for operand in operands}
248
+ )
249
+ return result_dataset
250
+
251
+
252
+ class Pivot(Operator):
253
+ @classmethod
254
+ def validate(cls, operands: List[str], dataset: Dataset) -> Dataset:
255
+ raise NotImplementedError
256
+
257
+ @classmethod
258
+ def evaluate(cls, operands: List[str], dataset: Dataset) -> Dataset:
259
+ raise NotImplementedError
260
+
261
+
262
+ class Unpivot(Operator):
263
+ @classmethod
264
+ def validate(cls, operands: List[str], dataset: Dataset) -> Dataset:
265
+ dataset_name = VirtualCounter._new_ds_name()
266
+ if len(operands) != 2:
267
+ raise ValueError("Unpivot clause requires two operands")
268
+ identifier, measure = operands
269
+
270
+ if len(dataset.get_identifiers()) < 1:
271
+ raise SemanticError("1-2-10", op=cls.op)
272
+ if identifier in dataset.components:
273
+ raise SemanticError("1-1-6-2", op=cls.op, name=identifier, dataset=dataset_name)
274
+
275
+ result_components = {comp.name: comp for comp in dataset.get_identifiers()}
276
+ result_dataset = Dataset(name=dataset_name, components=result_components, data=None)
277
+ # noinspection PyTypeChecker
278
+ result_dataset.add_component(
279
+ Component(name=identifier, data_type=String, role=Role.IDENTIFIER, nullable=False)
280
+ )
281
+ base_type = None
282
+ final_type: Type[ScalarType] = String
283
+ for comp in dataset.get_measures():
284
+ if base_type is None:
285
+ base_type = comp.data_type
286
+ else:
287
+ if check_unary_implicit_promotion(base_type, comp.data_type) is None:
288
+ raise ValueError("All measures must have the same data type on unpivot clause")
289
+ final_type = unary_implicit_promotion(base_type, comp.data_type)
290
+
291
+ result_dataset.add_component(
292
+ Component(name=measure, data_type=final_type, role=Role.MEASURE, nullable=True)
293
+ )
294
+ return result_dataset
295
+
296
+ @classmethod
297
+ def evaluate(cls, operands: List[str], dataset: Dataset) -> Dataset:
298
+ result_dataset = cls.validate(operands, dataset)
299
+ if dataset.data is not None:
300
+ result_dataset.data = dataset.data.melt(
301
+ id_vars=dataset.get_identifiers_names(),
302
+ value_vars=dataset.get_measures_names(),
303
+ var_name=operands[0],
304
+ value_name="NEW_COLUMN",
305
+ )
306
+ result_dataset.data.rename(columns={"NEW_COLUMN": operands[1]}, inplace=True)
307
+ result_dataset.data = result_dataset.data.dropna().reset_index(drop=True)
308
+ return result_dataset
309
+
310
+
311
+ class Sub(Operator):
312
+ op = SUBSPACE
313
+
314
+ @classmethod
315
+ def validate(cls, operands: List[DataComponent], dataset: Dataset) -> Dataset:
316
+ dataset_name = VirtualCounter._new_ds_name()
317
+ if len(dataset.get_identifiers()) < 1:
318
+ raise SemanticError("1-2-10", op=cls.op)
319
+ for operand in operands:
320
+ if operand.name not in dataset.components:
321
+ raise SemanticError(
322
+ "1-1-1-10",
323
+ op=cls.op,
324
+ comp_name=operand.name,
325
+ dataset_name=dataset_name,
326
+ )
327
+ if operand.role != Role.IDENTIFIER:
328
+ raise SemanticError(
329
+ "1-1-6-10",
330
+ op=cls.op,
331
+ operand=operand.name,
332
+ dataset_name=dataset_name,
333
+ )
334
+ if isinstance(operand, Scalar):
335
+ raise SemanticError("1-1-6-5", op=cls.op, name=operand.name)
336
+
337
+ result_components = {
338
+ name: comp
339
+ for name, comp in dataset.components.items()
340
+ if comp.name not in [operand.name for operand in operands]
341
+ }
342
+ return Dataset(name=dataset_name, components=result_components, data=None)
343
+
344
+ @classmethod
345
+ def evaluate(cls, operands: List[DataComponent], dataset: Dataset) -> Dataset:
346
+ result_dataset = cls.validate(operands, dataset)
347
+ result_dataset.data = copy(dataset.data) if dataset.data is not None else pd.DataFrame()
348
+ operand_names = [operand.name for operand in operands]
349
+ if dataset.data is not None and len(dataset.data) > 0:
350
+ # Filter the Dataframe
351
+ # by intersecting the indexes of the Data Component with True values
352
+ true_indexes = set()
353
+ is_first = True
354
+ for operand in operands:
355
+ if operand.data is not None:
356
+ if is_first:
357
+ true_indexes = set(operand.data[operand.data == True].index)
358
+ is_first = False
359
+ else:
360
+ true_indexes.intersection_update(
361
+ set(operand.data[operand.data == True].index)
362
+ )
363
+ result_dataset.data = result_dataset.data.iloc[list(true_indexes)]
364
+ result_dataset.data = result_dataset.data.drop(columns=operand_names, axis=1)
365
+ result_dataset.data = result_dataset.data.reset_index(drop=True)
366
+ return result_dataset