vtlengine 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vtlengine might be problematic. Click here for more details.

Files changed (54) hide show
  1. vtlengine/API/_InternalApi.py +153 -100
  2. vtlengine/API/__init__.py +109 -67
  3. vtlengine/AST/ASTConstructor.py +188 -98
  4. vtlengine/AST/ASTConstructorModules/Expr.py +306 -200
  5. vtlengine/AST/ASTConstructorModules/ExprComponents.py +172 -102
  6. vtlengine/AST/ASTConstructorModules/Terminals.py +158 -95
  7. vtlengine/AST/ASTEncoders.py +1 -1
  8. vtlengine/AST/ASTTemplate.py +8 -9
  9. vtlengine/AST/ASTVisitor.py +8 -12
  10. vtlengine/AST/DAG/__init__.py +43 -35
  11. vtlengine/AST/DAG/_words.py +4 -4
  12. vtlengine/AST/Grammar/lexer.py +732 -142
  13. vtlengine/AST/Grammar/parser.py +2188 -826
  14. vtlengine/AST/Grammar/tokens.py +128 -128
  15. vtlengine/AST/VtlVisitor.py +7 -4
  16. vtlengine/AST/__init__.py +22 -11
  17. vtlengine/DataTypes/NumericTypesHandling.py +5 -4
  18. vtlengine/DataTypes/TimeHandling.py +194 -301
  19. vtlengine/DataTypes/__init__.py +304 -218
  20. vtlengine/Exceptions/__init__.py +52 -27
  21. vtlengine/Exceptions/messages.py +134 -62
  22. vtlengine/Interpreter/__init__.py +781 -487
  23. vtlengine/Model/__init__.py +165 -121
  24. vtlengine/Operators/Aggregation.py +156 -95
  25. vtlengine/Operators/Analytic.py +115 -59
  26. vtlengine/Operators/Assignment.py +7 -4
  27. vtlengine/Operators/Boolean.py +27 -32
  28. vtlengine/Operators/CastOperator.py +177 -131
  29. vtlengine/Operators/Clause.py +137 -99
  30. vtlengine/Operators/Comparison.py +148 -117
  31. vtlengine/Operators/Conditional.py +149 -98
  32. vtlengine/Operators/General.py +68 -47
  33. vtlengine/Operators/HROperators.py +91 -72
  34. vtlengine/Operators/Join.py +217 -118
  35. vtlengine/Operators/Numeric.py +89 -44
  36. vtlengine/Operators/RoleSetter.py +16 -15
  37. vtlengine/Operators/Set.py +61 -36
  38. vtlengine/Operators/String.py +213 -139
  39. vtlengine/Operators/Time.py +334 -216
  40. vtlengine/Operators/Validation.py +117 -76
  41. vtlengine/Operators/__init__.py +340 -213
  42. vtlengine/Utils/__init__.py +195 -40
  43. vtlengine/__init__.py +1 -1
  44. vtlengine/files/output/__init__.py +15 -6
  45. vtlengine/files/output/_time_period_representation.py +10 -9
  46. vtlengine/files/parser/__init__.py +77 -52
  47. vtlengine/files/parser/_rfc_dialect.py +6 -5
  48. vtlengine/files/parser/_time_checking.py +46 -37
  49. vtlengine-1.0.1.dist-info/METADATA +236 -0
  50. vtlengine-1.0.1.dist-info/RECORD +58 -0
  51. {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/WHEEL +1 -1
  52. vtlengine-1.0.dist-info/METADATA +0 -104
  53. vtlengine-1.0.dist-info/RECORD +0 -58
  54. {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/LICENSE.md +0 -0
@@ -1,7 +1,15 @@
1
- from copy import copy
2
- from typing import List, Union
1
+ import pandas as pd
3
2
 
4
- from vtlengine.DataTypes import Boolean, String, check_unary_implicit_promotion, unary_implicit_promotion
3
+ from copy import copy
4
+ from typing import List, Union, Type
5
+
6
+ from vtlengine.DataTypes import (
7
+ Boolean,
8
+ String,
9
+ check_unary_implicit_promotion,
10
+ unary_implicit_promotion,
11
+ ScalarType,
12
+ )
5
13
  from vtlengine.Operators import Operator
6
14
 
7
15
  from vtlengine.AST import RenameNode
@@ -14,7 +22,7 @@ class Calc(Operator):
14
22
  op = CALC
15
23
 
16
24
  @classmethod
17
- def validate(cls, operands: List[Union[DataComponent, Scalar]], dataset: Dataset):
25
+ def validate(cls, operands: List[Union[DataComponent, Scalar]], dataset: Dataset) -> Dataset:
18
26
 
19
27
  result_components = {name: copy(comp) for name, comp in dataset.components.items()}
20
28
  result_dataset = Dataset(name=dataset.name, components=result_components, data=None)
@@ -23,32 +31,35 @@ class Calc(Operator):
23
31
 
24
32
  if operand.name in result_dataset.components:
25
33
  if result_dataset.components[operand.name].role == Role.IDENTIFIER:
26
- raise SemanticError("1-1-6-13", op=cls.op,
27
- comp_name=operand.name)
34
+ raise SemanticError("1-1-6-13", op=cls.op, comp_name=operand.name)
28
35
  # Override component with same name
29
36
  # TODO: Check this for version 2.1
30
37
  result_dataset.delete_component(operand.name)
31
38
 
32
39
  if isinstance(operand, Scalar):
33
- result_dataset.add_component(Component(
34
- name=operand.name,
35
- data_type=operand.data_type,
36
- role=Role.MEASURE,
37
- nullable=True
38
- ))
40
+ result_dataset.add_component(
41
+ Component(
42
+ name=operand.name,
43
+ data_type=operand.data_type,
44
+ role=Role.MEASURE,
45
+ nullable=True,
46
+ )
47
+ )
39
48
  else:
40
- result_dataset.add_component(Component(
41
- name=operand.name,
42
- data_type=operand.data_type,
43
- role=operand.role,
44
- nullable=operand.nullable
45
- ))
49
+ result_dataset.add_component(
50
+ Component(
51
+ name=operand.name,
52
+ data_type=operand.data_type,
53
+ role=operand.role,
54
+ nullable=operand.nullable,
55
+ )
56
+ )
46
57
  return result_dataset
47
58
 
48
59
  @classmethod
49
- def evaluate(cls, operands: List[DataComponent], dataset: Dataset):
60
+ def evaluate(cls, operands: List[Union[DataComponent, Scalar]], dataset: Dataset) -> Dataset:
50
61
  result_dataset = cls.validate(operands, dataset)
51
- result_dataset.data = dataset.data.copy()
62
+ result_dataset.data = dataset.data.copy() if dataset.data is not None else pd.DataFrame()
52
63
  for operand in operands:
53
64
  if isinstance(operand, Scalar):
54
65
  result_dataset.data[operand.name] = operand.value
@@ -61,12 +72,14 @@ class Aggregate(Operator):
61
72
  op = AGGREGATE
62
73
 
63
74
  @classmethod
64
- def validate(cls, operands: List[Union[DataComponent, Scalar]], dataset: Dataset):
75
+ def validate(cls, operands: List[Union[DataComponent, Scalar]], dataset: Dataset) -> Dataset:
65
76
 
66
77
  result_dataset = Dataset(name=dataset.name, components=dataset.components, data=None)
67
78
 
68
79
  for operand in operands:
69
- if operand.name in dataset.get_identifiers_names() or operand.role == Role.IDENTIFIER:
80
+ if operand.name in dataset.get_identifiers_names() or (
81
+ isinstance(operand, DataComponent) and operand.role == Role.IDENTIFIER
82
+ ):
70
83
  raise SemanticError("1-1-6-13", op=cls.op, comp_name=operand.name)
71
84
 
72
85
  elif operand.name in dataset.components:
@@ -74,30 +87,34 @@ class Aggregate(Operator):
74
87
  dataset.delete_component(operand.name)
75
88
 
76
89
  if isinstance(operand, Scalar):
77
- result_dataset.add_component(Component(
78
- name=operand.name,
79
- data_type=operand.data_type,
80
- role=Role.MEASURE,
81
- nullable=True
82
- ))
90
+ result_dataset.add_component(
91
+ Component(
92
+ name=operand.name,
93
+ data_type=operand.data_type,
94
+ role=Role.MEASURE,
95
+ nullable=True,
96
+ )
97
+ )
83
98
  else:
84
- result_dataset.add_component(Component(
85
- name=operand.name,
86
- data_type=operand.data_type,
87
- role=operand.role,
88
- nullable=operand.nullable
89
- ))
99
+ result_dataset.add_component(
100
+ Component(
101
+ name=operand.name,
102
+ data_type=operand.data_type,
103
+ role=operand.role,
104
+ nullable=operand.nullable,
105
+ )
106
+ )
90
107
  return result_dataset
91
108
 
92
109
  @classmethod
93
- def evaluate(cls, operands: List[DataComponent], dataset: Dataset):
110
+ def evaluate(cls, operands: List[Union[DataComponent, Scalar]], dataset: Dataset) -> Dataset:
94
111
  result_dataset = cls.validate(operands, dataset)
95
- result_dataset.data = dataset.data.copy()
112
+ result_dataset.data = copy(dataset.data) if dataset.data is not None else pd.DataFrame()
96
113
  for operand in operands:
97
114
  if isinstance(operand, Scalar):
98
115
  result_dataset.data[operand.name] = operand.value
99
116
  else:
100
- if len(operand.data) > 0:
117
+ if operand.data is not None and len(operand.data) > 0:
101
118
  result_dataset.data[operand.name] = operand.data
102
119
  else:
103
120
  result_dataset.data[operand.name] = None
@@ -107,16 +124,16 @@ class Aggregate(Operator):
107
124
  class Filter(Operator):
108
125
 
109
126
  @classmethod
110
- def validate(cls, condition: DataComponent, dataset: Dataset):
127
+ def validate(cls, condition: DataComponent, dataset: Dataset) -> Dataset:
111
128
  if condition.data_type != Boolean:
112
129
  raise ValueError(f"Filter condition must be of type {Boolean}")
113
130
  return Dataset(name=dataset.name, components=dataset.components, data=None)
114
131
 
115
132
  @classmethod
116
- def evaluate(cls, condition: DataComponent, dataset: Dataset):
133
+ def evaluate(cls, condition: DataComponent, dataset: Dataset) -> Dataset:
117
134
  result_dataset = cls.validate(condition, dataset)
118
- result_dataset.data = dataset.data.copy()
119
- if len(condition.data) > 0:
135
+ result_dataset.data = dataset.data.copy() if dataset.data is not None else pd.DataFrame()
136
+ if condition.data is not None and len(condition.data) > 0 and dataset.data is not None:
120
137
  true_indexes = condition.data[condition.data == True].index
121
138
  result_dataset.data = dataset.data.iloc[true_indexes].reset_index(drop=True)
122
139
  return result_dataset
@@ -126,27 +143,31 @@ class Keep(Operator):
126
143
  op = KEEP
127
144
 
128
145
  @classmethod
129
- def validate(cls, operands: List[str], dataset: Dataset):
146
+ def validate(cls, operands: List[str], dataset: Dataset) -> Dataset:
130
147
  for operand in operands:
131
148
  if operand not in dataset.get_components_names():
132
- raise SemanticError("1-1-1-10", op=cls.op, comp_name=operand,
133
- dataset_name=dataset.name)
149
+ raise SemanticError(
150
+ "1-1-1-10", op=cls.op, comp_name=operand, dataset_name=dataset.name
151
+ )
134
152
  if dataset.get_component(operand).role == Role.IDENTIFIER:
135
153
  raise SemanticError("1-1-6-2", op=cls.op, name=operand, dataset=dataset.name)
136
- result_components = {name: comp for name, comp in dataset.components.items()
137
- if comp.name in operands or comp.role == Role.IDENTIFIER}
138
-
154
+ result_components = {
155
+ name: comp
156
+ for name, comp in dataset.components.items()
157
+ if comp.name in operands or comp.role == Role.IDENTIFIER
158
+ }
139
159
  return Dataset(name=dataset.name, components=result_components, data=None)
140
160
 
141
161
  @classmethod
142
162
  def evaluate(cls, operands: List[str], dataset: Dataset) -> Dataset:
143
163
  if len(operands) == 0:
144
- raise ValueError('Keep clause requires at least one operand')
164
+ raise ValueError("Keep clause requires at least one operand")
145
165
  if dataset is None:
146
166
  if sum(isinstance(operand, Dataset) for operand in operands) != 1:
147
- raise ValueError('Keep clause requires at most one dataset operand')
167
+ raise ValueError("Keep clause requires at most one dataset operand")
148
168
  result_dataset = cls.validate(operands, dataset)
149
- result_dataset.data = dataset.data[dataset.get_identifiers_names() + operands]
169
+ if dataset.data is not None:
170
+ result_dataset.data = dataset.data[dataset.get_identifiers_names() + operands]
150
171
  return result_dataset
151
172
 
152
173
 
@@ -154,7 +175,7 @@ class Drop(Operator):
154
175
  op = DROP
155
176
 
156
177
  @classmethod
157
- def validate(cls, operands: List[str], dataset: Dataset):
178
+ def validate(cls, operands: List[str], dataset: Dataset) -> Dataset:
158
179
  for operand in operands:
159
180
  if operand not in dataset.components:
160
181
  raise SemanticError("1-1-1-10", comp_name=operand, dataset_name=dataset.name)
@@ -162,15 +183,16 @@ class Drop(Operator):
162
183
  raise SemanticError("1-1-6-2", op=cls.op, name=operand, dataset=dataset.name)
163
184
  if len(dataset.components) == len(operands):
164
185
  raise SemanticError("1-1-6-12", op=cls.op)
165
- result_components = {name: comp for name, comp in dataset.components.items()
166
- if comp.name not in operands}
167
-
186
+ result_components = {
187
+ name: comp for name, comp in dataset.components.items() if comp.name not in operands
188
+ }
168
189
  return Dataset(name=dataset.name, components=result_components, data=None)
169
190
 
170
191
  @classmethod
171
- def evaluate(cls, operands: List[str], dataset: Dataset):
192
+ def evaluate(cls, operands: List[str], dataset: Dataset) -> Dataset:
172
193
  result_dataset = cls.validate(operands, dataset)
173
- result_dataset.data = dataset.data.drop(columns=operands, axis=1)
194
+ if dataset.data is not None:
195
+ result_dataset.data = dataset.data.drop(columns=operands, axis=1)
174
196
  return result_dataset
175
197
 
176
198
 
@@ -178,26 +200,26 @@ class Rename(Operator):
178
200
  op = RENAME
179
201
 
180
202
  @classmethod
181
- def validate(cls, operands: List[RenameNode], dataset: Dataset):
203
+ def validate(cls, operands: List[RenameNode], dataset: Dataset) -> Dataset:
182
204
  from_names = [operand.old_name for operand in operands]
183
205
  if len(from_names) != len(set(from_names)):
184
- duplicates = set(
185
- [name for name in from_names if from_names.count(name) > 1])
206
+ duplicates = set([name for name in from_names if from_names.count(name) > 1])
186
207
  raise SemanticError("1-1-6-9", op=cls.op, from_components=duplicates)
187
208
 
188
209
  to_names = [operand.new_name for operand in operands]
189
- if len(to_names) != len(set(to_names)): # Si hay duplicados
190
- duplicates = set(
191
- [name for name in to_names if to_names.count(name) > 1])
210
+ if len(to_names) != len(set(to_names)): # If duplicates
211
+ duplicates = set([name for name in to_names if to_names.count(name) > 1])
192
212
  raise SemanticError("1-3-1", alias=duplicates)
193
213
 
194
214
  for operand in operands:
195
215
  if operand.old_name not in dataset.components.keys():
196
- raise SemanticError("1-1-1-10", op=cls.op, comp_name=operand.old_name,
197
- dataset_name=dataset.name)
216
+ raise SemanticError(
217
+ "1-1-1-10", op=cls.op, comp_name=operand.old_name, dataset_name=dataset.name
218
+ )
198
219
  if operand.new_name in dataset.components.keys():
199
- raise SemanticError("1-1-6-8", op=cls.op, comp_name=operand.new_name,
200
- dataset_name=dataset.name)
220
+ raise SemanticError(
221
+ "1-1-6-8", op=cls.op, comp_name=operand.new_name, dataset_name=dataset.name
222
+ )
201
223
 
202
224
  result_components = {comp.name: comp for comp in dataset.components.values()}
203
225
  for operand in operands:
@@ -205,35 +227,37 @@ class Rename(Operator):
205
227
  name=operand.new_name,
206
228
  data_type=result_components[operand.old_name].data_type,
207
229
  role=result_components[operand.old_name].role,
208
- nullable=result_components[operand.old_name].nullable
230
+ nullable=result_components[operand.old_name].nullable,
209
231
  )
210
232
  del result_components[operand.old_name]
211
233
 
212
234
  return Dataset(name=dataset.name, components=result_components, data=None)
213
235
 
214
236
  @classmethod
215
- def evaluate(cls, operands: List[RenameNode], dataset: Dataset):
237
+ def evaluate(cls, operands: List[RenameNode], dataset: Dataset) -> Dataset:
216
238
  result_dataset = cls.validate(operands, dataset)
217
- result_dataset.data = dataset.data.rename(columns={operand.old_name: operand.new_name
218
- for operand in operands})
239
+ if dataset.data is not None:
240
+ result_dataset.data = dataset.data.rename(
241
+ columns={operand.old_name: operand.new_name for operand in operands}
242
+ )
219
243
  return result_dataset
220
244
 
221
245
 
222
246
  class Pivot(Operator):
223
247
 
224
248
  @classmethod
225
- def validate(cls, operands: List[str], dataset: Dataset):
249
+ def validate(cls, operands: List[str], dataset: Dataset) -> Dataset:
226
250
  raise NotImplementedError
227
251
 
228
252
  @classmethod
229
- def evaluate(cls, operands: List[str], dataset: Dataset):
253
+ def evaluate(cls, operands: List[str], dataset: Dataset) -> Dataset:
230
254
  raise NotImplementedError
231
255
 
232
256
 
233
257
  class Unpivot(Operator):
234
258
 
235
259
  @classmethod
236
- def validate(cls, operands: List[str], dataset: Dataset):
260
+ def validate(cls, operands: List[str], dataset: Dataset) -> Dataset:
237
261
  if len(operands) != 2:
238
262
  raise ValueError("Unpivot clause requires two operands")
239
263
  identifier, measure = operands
@@ -246,10 +270,11 @@ class Unpivot(Operator):
246
270
  result_components = {comp.name: comp for comp in dataset.get_identifiers()}
247
271
  result_dataset = Dataset(name=dataset.name, components=result_components, data=None)
248
272
  # noinspection PyTypeChecker
249
- result_dataset.add_component(Component(name=identifier, data_type=String,
250
- role=Role.IDENTIFIER, nullable=False))
273
+ result_dataset.add_component(
274
+ Component(name=identifier, data_type=String, role=Role.IDENTIFIER, nullable=False)
275
+ )
251
276
  base_type = None
252
- final_type = String
277
+ final_type: Type[ScalarType] = String
253
278
  for comp in dataset.get_measures():
254
279
  if base_type is None:
255
280
  base_type = comp.data_type
@@ -258,18 +283,23 @@ class Unpivot(Operator):
258
283
  raise ValueError("All measures must have the same data type on unpivot clause")
259
284
  final_type = unary_implicit_promotion(base_type, comp.data_type)
260
285
 
261
- result_dataset.add_component(Component(name=measure, data_type=final_type,
262
- role=Role.MEASURE, nullable=True))
286
+ result_dataset.add_component(
287
+ Component(name=measure, data_type=final_type, role=Role.MEASURE, nullable=True)
288
+ )
263
289
  return result_dataset
264
290
 
265
291
  @classmethod
266
- def evaluate(cls, operands: List[str], dataset: Dataset):
292
+ def evaluate(cls, operands: List[str], dataset: Dataset) -> Dataset:
267
293
  result_dataset = cls.validate(operands, dataset)
268
- result_dataset.data = dataset.data.melt(id_vars=dataset.get_identifiers_names(),
269
- value_vars=dataset.get_measures_names(),
270
- var_name=operands[0], value_name="NEW_COLUMN")
271
- result_dataset.data.rename(columns={"NEW_COLUMN": operands[1]}, inplace=True)
272
- result_dataset.data = result_dataset.data.dropna().reset_index(drop=True)
294
+ if dataset.data is not None:
295
+ result_dataset.data = dataset.data.melt(
296
+ id_vars=dataset.get_identifiers_names(),
297
+ value_vars=dataset.get_measures_names(),
298
+ var_name=operands[0],
299
+ value_name="NEW_COLUMN",
300
+ )
301
+ result_dataset.data.rename(columns={"NEW_COLUMN": operands[1]}, inplace=True)
302
+ result_dataset.data = result_dataset.data.dropna().reset_index(drop=True)
273
303
  return result_dataset
274
304
 
275
305
 
@@ -277,39 +307,47 @@ class Sub(Operator):
277
307
  op = SUBSPACE
278
308
 
279
309
  @classmethod
280
- def validate(cls, operands: List[DataComponent], dataset: Dataset):
310
+ def validate(cls, operands: List[DataComponent], dataset: Dataset) -> Dataset:
281
311
  if len(dataset.get_identifiers()) < 1:
282
312
  raise SemanticError("1-3-27", op=cls.op)
283
313
  for operand in operands:
284
314
  if operand.name not in dataset.components:
285
- raise SemanticError("1-1-1-10", op=cls.op, comp_name=operand.name,
286
- dataset_name=dataset.name)
315
+ raise SemanticError(
316
+ "1-1-1-10", op=cls.op, comp_name=operand.name, dataset_name=dataset.name
317
+ )
287
318
  if operand.role != Role.IDENTIFIER:
288
- raise SemanticError("1-1-6-10", op=cls.op, operand=operand.name,
289
- dataset_name=dataset.name)
319
+ raise SemanticError(
320
+ "1-1-6-10", op=cls.op, operand=operand.name, dataset_name=dataset.name
321
+ )
290
322
  if isinstance(operand, Scalar):
291
323
  raise SemanticError("1-1-6-5", op=cls.op, name=operand.name)
292
324
 
293
- result_components = {name: comp for name, comp in dataset.components.items()
294
- if comp.name not in [operand.name for operand in operands]}
325
+ result_components = {
326
+ name: comp
327
+ for name, comp in dataset.components.items()
328
+ if comp.name not in [operand.name for operand in operands]
329
+ }
295
330
  return Dataset(name=dataset.name, components=result_components, data=None)
296
331
 
297
332
  @classmethod
298
- def evaluate(cls, operands: List[DataComponent], dataset: Dataset):
333
+ def evaluate(cls, operands: List[DataComponent], dataset: Dataset) -> Dataset:
299
334
  result_dataset = cls.validate(operands, dataset)
300
- result_dataset.data = dataset.data.copy()
335
+ result_dataset.data = copy(dataset.data) if dataset.data is not None else pd.DataFrame()
301
336
  operand_names = [operand.name for operand in operands]
302
- if len(dataset.data) > 0:
337
+ if dataset.data is not None and len(dataset.data) > 0:
303
338
  # Filter the Dataframe
304
339
  # by intersecting the indexes of the Data Component with True values
305
340
  true_indexes = set()
306
341
  is_first = True
307
342
  for operand in operands:
308
- if is_first:
309
- true_indexes = set(operand.data[operand.data == True].index)
310
- is_first = False
311
- else:
312
- true_indexes.intersection_update(set(operand.data[operand.data == True].index))
343
+ if operand.data is not None:
344
+ if is_first:
345
+ true_indexes = set(operand.data[operand.data == True].index)
346
+ is_first = False
347
+ else:
348
+ true_indexes.intersection_update(
349
+ set(operand.data[operand.data == True].index)
350
+ )
313
351
  result_dataset.data = result_dataset.data.iloc[list(true_indexes)]
314
352
  result_dataset.data = result_dataset.data.drop(columns=operand_names, axis=1)
315
353
  result_dataset.data = result_dataset.data.reset_index(drop=True)