vtlengine 1.1rc2__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +288 -29
- vtlengine/API/__init__.py +277 -70
- vtlengine/AST/ASTComment.py +56 -0
- vtlengine/AST/ASTConstructor.py +71 -18
- vtlengine/AST/ASTConstructorModules/Expr.py +197 -75
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +81 -38
- vtlengine/AST/ASTConstructorModules/Terminals.py +76 -31
- vtlengine/AST/ASTConstructorModules/__init__.py +50 -0
- vtlengine/AST/ASTEncoders.py +4 -0
- vtlengine/AST/ASTString.py +622 -0
- vtlengine/AST/ASTTemplate.py +28 -2
- vtlengine/AST/DAG/__init__.py +44 -6
- vtlengine/AST/DAG/_words.py +1 -0
- vtlengine/AST/Grammar/Vtl.g4 +7 -7
- vtlengine/AST/Grammar/lexer.py +19759 -1112
- vtlengine/AST/Grammar/parser.py +17996 -3199
- vtlengine/AST/__init__.py +127 -14
- vtlengine/Exceptions/messages.py +14 -2
- vtlengine/Interpreter/__init__.py +90 -11
- vtlengine/Model/__init__.py +9 -4
- vtlengine/Operators/Aggregation.py +13 -6
- vtlengine/Operators/Analytic.py +19 -13
- vtlengine/Operators/CastOperator.py +5 -2
- vtlengine/Operators/Clause.py +26 -18
- vtlengine/Operators/Comparison.py +3 -1
- vtlengine/Operators/Conditional.py +40 -18
- vtlengine/Operators/General.py +3 -1
- vtlengine/Operators/HROperators.py +3 -1
- vtlengine/Operators/Join.py +4 -2
- vtlengine/Operators/Time.py +22 -15
- vtlengine/Operators/Validation.py +5 -2
- vtlengine/Operators/__init__.py +15 -8
- vtlengine/Utils/__Virtual_Assets.py +34 -0
- vtlengine/Utils/__init__.py +49 -0
- vtlengine/__init__.py +4 -2
- vtlengine/files/parser/__init__.py +16 -26
- vtlengine/files/parser/_rfc_dialect.py +1 -1
- vtlengine/py.typed +0 -0
- vtlengine-1.2.0.dist-info/METADATA +92 -0
- vtlengine-1.2.0.dist-info/RECORD +63 -0
- {vtlengine-1.1rc2.dist-info → vtlengine-1.2.0.dist-info}/WHEEL +1 -1
- vtlengine-1.1rc2.dist-info/METADATA +0 -248
- vtlengine-1.1rc2.dist-info/RECORD +0 -59
- {vtlengine-1.1rc2.dist-info → vtlengine-1.2.0.dist-info}/LICENSE.md +0 -0
|
@@ -19,6 +19,7 @@ from vtlengine.DataTypes import (
|
|
|
19
19
|
from vtlengine.Exceptions import SemanticError
|
|
20
20
|
from vtlengine.Model import DataComponent, Dataset, Role, Scalar
|
|
21
21
|
from vtlengine.Operators import Binary, Operator
|
|
22
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
22
23
|
|
|
23
24
|
|
|
24
25
|
class If(Operator):
|
|
@@ -75,8 +76,8 @@ class If(Operator):
|
|
|
75
76
|
) -> Dataset:
|
|
76
77
|
ids = condition.get_identifiers_names()
|
|
77
78
|
condition_measure = condition.get_measures_names()[0]
|
|
78
|
-
true_data = condition.data[condition.data[condition_measure] == True]
|
|
79
|
-
false_data = condition.data[condition.data[condition_measure] != True]
|
|
79
|
+
true_data = condition.data[condition.data[condition_measure].dropna() == True]
|
|
80
|
+
false_data = condition.data[condition.data[condition_measure] != True]
|
|
80
81
|
|
|
81
82
|
if isinstance(true_branch, Dataset):
|
|
82
83
|
if len(true_data) > 0 and true_branch.data is not None:
|
|
@@ -84,7 +85,7 @@ class If(Operator):
|
|
|
84
85
|
true_data,
|
|
85
86
|
true_branch.data,
|
|
86
87
|
on=ids,
|
|
87
|
-
how="
|
|
88
|
+
how="left",
|
|
88
89
|
suffixes=("_condition", ""),
|
|
89
90
|
)
|
|
90
91
|
else:
|
|
@@ -99,7 +100,7 @@ class If(Operator):
|
|
|
99
100
|
false_data,
|
|
100
101
|
false_branch.data,
|
|
101
102
|
on=ids,
|
|
102
|
-
how="
|
|
103
|
+
how="left",
|
|
103
104
|
suffixes=("_condition", ""),
|
|
104
105
|
)
|
|
105
106
|
else:
|
|
@@ -113,12 +114,12 @@ class If(Operator):
|
|
|
113
114
|
pd.concat([true_data, false_data], ignore_index=True)
|
|
114
115
|
.drop_duplicates()
|
|
115
116
|
.sort_values(by=ids)
|
|
116
|
-
)
|
|
117
|
+
).reset_index(drop=True)
|
|
117
118
|
if isinstance(result, Dataset):
|
|
118
119
|
drop_columns = [
|
|
119
120
|
column for column in result.data.columns if column not in result.components
|
|
120
121
|
]
|
|
121
|
-
result.data = result.data.
|
|
122
|
+
result.data = result.data.drop(columns=drop_columns)
|
|
122
123
|
if isinstance(true_branch, Scalar) and isinstance(false_branch, Scalar):
|
|
123
124
|
result.get_measures()[0].data_type = true_branch.data_type
|
|
124
125
|
result.get_measures()[0].name = COMP_NAME_MAPPING[true_branch.data_type]
|
|
@@ -135,6 +136,7 @@ class If(Operator):
|
|
|
135
136
|
nullable = False
|
|
136
137
|
left = true_branch
|
|
137
138
|
right = false_branch
|
|
139
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
138
140
|
if true_branch.__class__ != false_branch.__class__:
|
|
139
141
|
if (isinstance(true_branch, DataComponent) and isinstance(false_branch, Dataset)) or (
|
|
140
142
|
isinstance(true_branch, Dataset) and isinstance(false_branch, DataComponent)
|
|
@@ -147,6 +149,7 @@ class If(Operator):
|
|
|
147
149
|
right = true_branch
|
|
148
150
|
|
|
149
151
|
# Datacomponent
|
|
152
|
+
comp_name = VirtualCounter._new_dc_name()
|
|
150
153
|
if isinstance(condition, DataComponent):
|
|
151
154
|
if not condition.data_type == Boolean:
|
|
152
155
|
raise SemanticError(
|
|
@@ -164,7 +167,7 @@ class If(Operator):
|
|
|
164
167
|
if isinstance(right, DataComponent):
|
|
165
168
|
nullable |= right.nullable
|
|
166
169
|
return DataComponent(
|
|
167
|
-
name=
|
|
170
|
+
name=comp_name,
|
|
168
171
|
data=None,
|
|
169
172
|
data_type=binary_implicit_promotion(left.data_type, right.data_type),
|
|
170
173
|
role=Role.MEASURE,
|
|
@@ -184,7 +187,7 @@ class If(Operator):
|
|
|
184
187
|
left.data_type = right.data_type = binary_implicit_promotion(
|
|
185
188
|
left.data_type, right.data_type
|
|
186
189
|
)
|
|
187
|
-
return Dataset(name=
|
|
190
|
+
return Dataset(name=dataset_name, components=copy(condition.components), data=None)
|
|
188
191
|
if left.get_identifiers() != condition.get_identifiers():
|
|
189
192
|
raise SemanticError("1-1-9-10", op=cls.op, clause=left.name)
|
|
190
193
|
if isinstance(right, Scalar):
|
|
@@ -218,7 +221,7 @@ class If(Operator):
|
|
|
218
221
|
if left.get_identifiers() != condition.get_identifiers():
|
|
219
222
|
raise SemanticError("1-1-9-6", op=cls.op)
|
|
220
223
|
result_components = {comp_name: copy(comp) for comp_name, comp in left.components.items()}
|
|
221
|
-
return Dataset(name=
|
|
224
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
222
225
|
|
|
223
226
|
|
|
224
227
|
class Nvl(Binary):
|
|
@@ -238,7 +241,7 @@ class Nvl(Binary):
|
|
|
238
241
|
result = cls.validate(left, right)
|
|
239
242
|
|
|
240
243
|
if isinstance(left, Scalar) and isinstance(result, Scalar):
|
|
241
|
-
if
|
|
244
|
+
if left.data_type is Null:
|
|
242
245
|
result.value = right.value
|
|
243
246
|
else:
|
|
244
247
|
result.value = left.value
|
|
@@ -254,6 +257,8 @@ class Nvl(Binary):
|
|
|
254
257
|
|
|
255
258
|
@classmethod
|
|
256
259
|
def validate(cls, left: Any, right: Any) -> Union[Scalar, DataComponent, Dataset]:
|
|
260
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
261
|
+
comp_name = VirtualCounter._new_dc_name()
|
|
257
262
|
result_components = {}
|
|
258
263
|
if isinstance(left, Scalar):
|
|
259
264
|
if not isinstance(right, Scalar):
|
|
@@ -271,7 +276,7 @@ class Nvl(Binary):
|
|
|
271
276
|
)
|
|
272
277
|
cls.type_validation(left.data_type, right.data_type)
|
|
273
278
|
return DataComponent(
|
|
274
|
-
name=
|
|
279
|
+
name=comp_name,
|
|
275
280
|
data=pd.Series(dtype=object),
|
|
276
281
|
data_type=left.data_type,
|
|
277
282
|
role=Role.MEASURE,
|
|
@@ -298,7 +303,7 @@ class Nvl(Binary):
|
|
|
298
303
|
}
|
|
299
304
|
for comp in result_components.values():
|
|
300
305
|
comp.nullable = False
|
|
301
|
-
return Dataset(name=
|
|
306
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
302
307
|
|
|
303
308
|
|
|
304
309
|
class Case(Operator):
|
|
@@ -308,8 +313,23 @@ class Case(Operator):
|
|
|
308
313
|
) -> Union[Scalar, DataComponent, Dataset]:
|
|
309
314
|
result = cls.validate(conditions, thenOps, elseOp)
|
|
310
315
|
for condition in conditions:
|
|
311
|
-
if isinstance(condition,
|
|
316
|
+
if isinstance(condition, Dataset) and condition.data is not None:
|
|
312
317
|
condition.data.fillna(False, inplace=True)
|
|
318
|
+
condition_measure = condition.get_measures_names()[0]
|
|
319
|
+
if condition.data[condition_measure].dtype != bool:
|
|
320
|
+
condition.data[condition_measure] = condition.data[condition_measure].astype(
|
|
321
|
+
bool
|
|
322
|
+
)
|
|
323
|
+
elif (
|
|
324
|
+
isinstance(
|
|
325
|
+
condition,
|
|
326
|
+
DataComponent,
|
|
327
|
+
)
|
|
328
|
+
and condition.data is not None
|
|
329
|
+
):
|
|
330
|
+
condition.data.fillna(False, inplace=True)
|
|
331
|
+
if condition.data.dtype != bool:
|
|
332
|
+
condition.data = condition.data.astype(bool)
|
|
313
333
|
elif isinstance(condition, Scalar) and condition.value is None:
|
|
314
334
|
condition.value = False
|
|
315
335
|
|
|
@@ -324,9 +344,9 @@ class Case(Operator):
|
|
|
324
344
|
|
|
325
345
|
for i, condition in enumerate(conditions):
|
|
326
346
|
value = thenOps[i].value if isinstance(thenOps[i], Scalar) else thenOps[i].data
|
|
327
|
-
result.data = np.where(
|
|
347
|
+
result.data = np.where(
|
|
328
348
|
condition.data.notna(),
|
|
329
|
-
np.where(condition.data, value, result.data),
|
|
349
|
+
np.where(condition.data, value, result.data),
|
|
330
350
|
result.data,
|
|
331
351
|
)
|
|
332
352
|
|
|
@@ -366,7 +386,7 @@ class Case(Operator):
|
|
|
366
386
|
]
|
|
367
387
|
)
|
|
368
388
|
|
|
369
|
-
result.data.loc[condition_mask_else, columns] = (
|
|
389
|
+
result.data.loc[condition_mask_else, columns] = ( # type: ignore[index, unused-ignore]
|
|
370
390
|
elseOp.value
|
|
371
391
|
if isinstance(elseOp, Scalar)
|
|
372
392
|
else elseOp.data.loc[condition_mask_else, columns]
|
|
@@ -378,6 +398,8 @@ class Case(Operator):
|
|
|
378
398
|
def validate(
|
|
379
399
|
cls, conditions: List[Any], thenOps: List[Any], elseOp: Any
|
|
380
400
|
) -> Union[Scalar, DataComponent, Dataset]:
|
|
401
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
402
|
+
comp_name = VirtualCounter._new_dc_name()
|
|
381
403
|
if len(set(map(type, conditions))) > 1:
|
|
382
404
|
raise SemanticError("2-1-9-1", op=cls.op)
|
|
383
405
|
|
|
@@ -421,7 +443,7 @@ class Case(Operator):
|
|
|
421
443
|
data_type = binary_implicit_promotion(data_type, op.data_type)
|
|
422
444
|
|
|
423
445
|
return DataComponent(
|
|
424
|
-
name=
|
|
446
|
+
name=comp_name,
|
|
425
447
|
data=None,
|
|
426
448
|
data_type=data_type,
|
|
427
449
|
role=Role.MEASURE,
|
|
@@ -444,4 +466,4 @@ class Case(Operator):
|
|
|
444
466
|
if isinstance(op, Dataset) and op.get_components_names() != comp_names:
|
|
445
467
|
raise SemanticError("2-1-9-7", op=cls.op)
|
|
446
468
|
|
|
447
|
-
return Dataset(name=
|
|
469
|
+
return Dataset(name=dataset_name, components=components, data=None)
|
vtlengine/Operators/General.py
CHANGED
|
@@ -7,6 +7,7 @@ from vtlengine.DataTypes import COMP_NAME_MAPPING
|
|
|
7
7
|
from vtlengine.Exceptions import SemanticError
|
|
8
8
|
from vtlengine.Model import Component, DataComponent, Dataset, ExternalRoutine, Role
|
|
9
9
|
from vtlengine.Operators import Binary, Unary
|
|
10
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
class Membership(Binary):
|
|
@@ -21,6 +22,7 @@ class Membership(Binary):
|
|
|
21
22
|
|
|
22
23
|
@classmethod
|
|
23
24
|
def validate(cls, left_operand: Any, right_operand: Any) -> Dataset:
|
|
25
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
24
26
|
if right_operand not in left_operand.components:
|
|
25
27
|
raise SemanticError(
|
|
26
28
|
"1-1-1-10",
|
|
@@ -46,7 +48,7 @@ class Membership(Binary):
|
|
|
46
48
|
for name, comp in left_operand.components.items()
|
|
47
49
|
if comp.role == Role.IDENTIFIER or comp.name == right_operand
|
|
48
50
|
}
|
|
49
|
-
result_dataset = Dataset(name=
|
|
51
|
+
result_dataset = Dataset(name=dataset_name, components=result_components, data=None)
|
|
50
52
|
return result_dataset
|
|
51
53
|
|
|
52
54
|
@classmethod
|
|
@@ -9,6 +9,7 @@ import vtlengine.Operators as Operators
|
|
|
9
9
|
from vtlengine.AST.Grammar.tokens import HIERARCHY
|
|
10
10
|
from vtlengine.DataTypes import Boolean, Number
|
|
11
11
|
from vtlengine.Model import Component, DataComponent, Dataset, Role
|
|
12
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
def get_measure_from_dataset(dataset: Dataset, code_item: str) -> DataComponent:
|
|
@@ -221,10 +222,11 @@ class Hierarchy(Operators.Operator):
|
|
|
221
222
|
def validate(
|
|
222
223
|
cls, dataset: Dataset, computed_dict: Dict[str, DataFrame], output: str
|
|
223
224
|
) -> Dataset:
|
|
225
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
224
226
|
result_components = {
|
|
225
227
|
comp_name: copy(comp) for comp_name, comp in dataset.components.items()
|
|
226
228
|
}
|
|
227
|
-
return Dataset(name=
|
|
229
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
228
230
|
|
|
229
231
|
@classmethod
|
|
230
232
|
def evaluate(
|
vtlengine/Operators/Join.py
CHANGED
|
@@ -13,6 +13,7 @@ from vtlengine.DataTypes import binary_implicit_promotion
|
|
|
13
13
|
from vtlengine.Exceptions import SemanticError
|
|
14
14
|
from vtlengine.Model import Component, Dataset, Role
|
|
15
15
|
from vtlengine.Operators import Operator, _id_type_promotion_join_keys
|
|
16
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
class Join(Operator):
|
|
@@ -196,12 +197,13 @@ class Join(Operator):
|
|
|
196
197
|
|
|
197
198
|
@classmethod
|
|
198
199
|
def validate(cls, operands: List[Dataset], using: Optional[List[str]]) -> Dataset:
|
|
200
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
199
201
|
if len(operands) < 1 or sum([isinstance(op, Dataset) for op in operands]) < 1:
|
|
200
202
|
raise Exception("Join operator requires at least 1 dataset")
|
|
201
203
|
if not all(isinstance(op, Dataset) for op in operands):
|
|
202
204
|
raise SemanticError("1-1-13-10")
|
|
203
205
|
if len(operands) == 1 and isinstance(operands[0], Dataset):
|
|
204
|
-
return Dataset(name=
|
|
206
|
+
return Dataset(name=dataset_name, components=operands[0].components, data=None)
|
|
205
207
|
for op in operands:
|
|
206
208
|
if len(op.get_identifiers()) == 0:
|
|
207
209
|
raise SemanticError("1-3-27", op=cls.op)
|
|
@@ -215,7 +217,7 @@ class Join(Operator):
|
|
|
215
217
|
if len(set(components.keys())) != len(components):
|
|
216
218
|
raise SemanticError("1-1-13-9", comp_name="")
|
|
217
219
|
|
|
218
|
-
return Dataset(name=
|
|
220
|
+
return Dataset(name=dataset_name, components=components, data=None)
|
|
219
221
|
|
|
220
222
|
@classmethod
|
|
221
223
|
def identifiers_validation(cls, operands: List[Dataset], using: Optional[List[str]]) -> None:
|
vtlengine/Operators/Time.py
CHANGED
|
@@ -40,6 +40,7 @@ from vtlengine.DataTypes.TimeHandling import (
|
|
|
40
40
|
)
|
|
41
41
|
from vtlengine.Exceptions import SemanticError
|
|
42
42
|
from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar
|
|
43
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
43
44
|
|
|
44
45
|
|
|
45
46
|
class Time(Operators.Operator):
|
|
@@ -57,14 +58,17 @@ class Time(Operators.Operator):
|
|
|
57
58
|
op = FLOW_TO_STOCK
|
|
58
59
|
|
|
59
60
|
@classmethod
|
|
60
|
-
def _get_time_id(cls, operand: Dataset) ->
|
|
61
|
+
def _get_time_id(cls, operand: Dataset) -> str:
|
|
61
62
|
reference_id = None
|
|
63
|
+
identifiers = operand.get_identifiers()
|
|
64
|
+
if len(identifiers) == 0:
|
|
65
|
+
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
|
|
62
66
|
for id in operand.get_identifiers():
|
|
63
67
|
if id.data_type in cls.TIME_DATA_TYPES:
|
|
64
68
|
if reference_id is not None:
|
|
65
69
|
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
|
|
66
70
|
reference_id = id.name
|
|
67
|
-
return reference_id
|
|
71
|
+
return str(reference_id)
|
|
68
72
|
|
|
69
73
|
@classmethod
|
|
70
74
|
def sort_by_time(cls, operand: Dataset) -> Optional[pd.DataFrame]:
|
|
@@ -121,12 +125,13 @@ class Time(Operators.Operator):
|
|
|
121
125
|
class Unary(Time):
|
|
122
126
|
@classmethod
|
|
123
127
|
def validate(cls, operand: Any) -> Any:
|
|
128
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
124
129
|
if not isinstance(operand, Dataset):
|
|
125
130
|
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
|
|
126
131
|
if cls._get_time_id(operand) is None:
|
|
127
132
|
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
|
|
128
133
|
operand.data = cls.sort_by_time(operand)
|
|
129
|
-
return Dataset(name=
|
|
134
|
+
return Dataset(name=dataset_name, components=operand.components.copy(), data=None)
|
|
130
135
|
|
|
131
136
|
@classmethod
|
|
132
137
|
def evaluate(cls, operand: Any) -> Any:
|
|
@@ -180,9 +185,10 @@ class Period_indicator(Unary):
|
|
|
180
185
|
|
|
181
186
|
@classmethod
|
|
182
187
|
def validate(cls, operand: Any) -> Any:
|
|
188
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
183
189
|
if isinstance(operand, Dataset):
|
|
184
190
|
time_id = cls._get_time_id(operand)
|
|
185
|
-
if
|
|
191
|
+
if operand.components[time_id].data_type != TimePeriod:
|
|
186
192
|
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time period dataset")
|
|
187
193
|
result_components = {
|
|
188
194
|
comp.name: comp
|
|
@@ -195,7 +201,7 @@ class Period_indicator(Unary):
|
|
|
195
201
|
role=Role.MEASURE,
|
|
196
202
|
nullable=True,
|
|
197
203
|
)
|
|
198
|
-
return Dataset(name=
|
|
204
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
199
205
|
# DataComponent and Scalar validation
|
|
200
206
|
if operand.data_type != TimePeriod:
|
|
201
207
|
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time period component")
|
|
@@ -223,7 +229,7 @@ class Period_indicator(Unary):
|
|
|
223
229
|
if (operand.data is not None)
|
|
224
230
|
else pd.Series()
|
|
225
231
|
)
|
|
226
|
-
period_series: Any = result.data[cls.time_id].map(cls._get_period)
|
|
232
|
+
period_series: Any = result.data[cls.time_id].map(cls._get_period)
|
|
227
233
|
result.data["duration_var"] = period_series
|
|
228
234
|
return result
|
|
229
235
|
|
|
@@ -286,6 +292,7 @@ class Fill_time_series(Binary):
|
|
|
286
292
|
|
|
287
293
|
@classmethod
|
|
288
294
|
def validate(cls, operand: Dataset, fill_type: str) -> Dataset:
|
|
295
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
289
296
|
if not isinstance(operand, Dataset):
|
|
290
297
|
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
|
|
291
298
|
cls.time_id = cls._get_time_id(operand)
|
|
@@ -295,7 +302,7 @@ class Fill_time_series(Binary):
|
|
|
295
302
|
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
|
|
296
303
|
if fill_type not in ["all", "single"]:
|
|
297
304
|
fill_type = "all"
|
|
298
|
-
return Dataset(name=
|
|
305
|
+
return Dataset(name=dataset_name, components=operand.components.copy(), data=None)
|
|
299
306
|
|
|
300
307
|
@classmethod
|
|
301
308
|
def max_min_from_period(cls, data: pd.DataFrame, mode: str = "all") -> Dict[str, Any]:
|
|
@@ -378,7 +385,7 @@ class Fill_time_series(Binary):
|
|
|
378
385
|
)
|
|
379
386
|
|
|
380
387
|
filled_data = pd.concat(filled_data, ignore_index=True)
|
|
381
|
-
combined_data = pd.concat([filled_data, data], ignore_index=True)
|
|
388
|
+
combined_data = pd.concat([filled_data, data], ignore_index=True)
|
|
382
389
|
if len(cls.periods) == 1 and cls.periods[0] == "A":
|
|
383
390
|
combined_data[cls.time_id] = combined_data[cls.time_id].astype(int)
|
|
384
391
|
else:
|
|
@@ -456,7 +463,7 @@ class Fill_time_series(Binary):
|
|
|
456
463
|
|
|
457
464
|
filled_data = pd.concat(filled_data, ignore_index=True)
|
|
458
465
|
filled_data[cls.time_id] = filled_data[cls.time_id].dt.strftime(date_format)
|
|
459
|
-
combined_data = pd.concat([filled_data, data], ignore_index=True)
|
|
466
|
+
combined_data = pd.concat([filled_data, data], ignore_index=True)
|
|
460
467
|
combined_data[cls.time_id] = combined_data[cls.time_id].astype(str)
|
|
461
468
|
return combined_data.sort_values(by=cls.other_ids + [cls.time_id])
|
|
462
469
|
|
|
@@ -544,9 +551,7 @@ class Time_Shift(Binary):
|
|
|
544
551
|
shift_value = int(shift_value.value)
|
|
545
552
|
cls.time_id = cls._get_time_id(result)
|
|
546
553
|
|
|
547
|
-
data_type: Any =
|
|
548
|
-
result.components[cls.time_id].data_type if isinstance(cls.time_id, str) else None
|
|
549
|
-
)
|
|
554
|
+
data_type: Any = result.components[cls.time_id].data_type
|
|
550
555
|
|
|
551
556
|
if data_type == Date:
|
|
552
557
|
freq = cls.find_min_frequency(
|
|
@@ -573,9 +578,10 @@ class Time_Shift(Binary):
|
|
|
573
578
|
|
|
574
579
|
@classmethod
|
|
575
580
|
def validate(cls, operand: Dataset, shift_value: str) -> Dataset:
|
|
581
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
576
582
|
if cls._get_time_id(operand) is None:
|
|
577
583
|
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
|
|
578
|
-
return Dataset(name=
|
|
584
|
+
return Dataset(name=dataset_name, components=operand.components.copy(), data=None)
|
|
579
585
|
|
|
580
586
|
@classmethod
|
|
581
587
|
def shift_dates(cls, dates: Any, shift_value: int, frequency: str) -> Any:
|
|
@@ -906,6 +912,7 @@ class Date_Add(Parametrized):
|
|
|
906
912
|
def validate(
|
|
907
913
|
cls, operand: Union[Scalar, DataComponent, Dataset], param_list: List[Scalar]
|
|
908
914
|
) -> Union[Scalar, DataComponent, Dataset]:
|
|
915
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
909
916
|
expected_types = [Integer, String]
|
|
910
917
|
for i, param in enumerate(param_list):
|
|
911
918
|
error = (
|
|
@@ -937,7 +944,7 @@ class Date_Add(Parametrized):
|
|
|
937
944
|
|
|
938
945
|
if all(comp.data_type not in [Date, TimePeriod] for comp in operand.components.values()):
|
|
939
946
|
raise SemanticError("2-1-19-14", op=cls.op, name=operand.name)
|
|
940
|
-
return Dataset(name=
|
|
947
|
+
return Dataset(name=dataset_name, components=operand.components.copy(), data=None)
|
|
941
948
|
|
|
942
949
|
@classmethod
|
|
943
950
|
def evaluate(
|
|
@@ -966,7 +973,7 @@ class Date_Add(Parametrized):
|
|
|
966
973
|
for measure in operand.get_measures():
|
|
967
974
|
if measure.data_type in [Date, TimePeriod]:
|
|
968
975
|
result.data[measure.name] = result.data[measure.name].map(
|
|
969
|
-
lambda x: cls.py_op(x, shift, period, measure.data_type == TimePeriod),
|
|
976
|
+
lambda x: cls.py_op(str(x), shift, period, measure.data_type == TimePeriod),
|
|
970
977
|
na_action="ignore",
|
|
971
978
|
)
|
|
972
979
|
measure.data_type = Date
|
|
@@ -14,6 +14,7 @@ from vtlengine.DataTypes import (
|
|
|
14
14
|
from vtlengine.Exceptions import SemanticError
|
|
15
15
|
from vtlengine.Model import Component, Dataset, Role
|
|
16
16
|
from vtlengine.Operators import Operator
|
|
17
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
17
18
|
|
|
18
19
|
|
|
19
20
|
# noinspection PyTypeChecker
|
|
@@ -29,6 +30,7 @@ class Check(Operator):
|
|
|
29
30
|
error_level: Optional[int],
|
|
30
31
|
invalid: bool,
|
|
31
32
|
) -> Dataset:
|
|
33
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
32
34
|
if len(validation_element.get_measures()) != 1:
|
|
33
35
|
raise SemanticError("1-1-10-1", op=cls.op, op_type="validation", me_type="Boolean")
|
|
34
36
|
measure = validation_element.get_measures()[0]
|
|
@@ -71,7 +73,7 @@ class Check(Operator):
|
|
|
71
73
|
name="errorlevel", data_type=Integer, role=Role.MEASURE, nullable=True
|
|
72
74
|
)
|
|
73
75
|
|
|
74
|
-
return Dataset(name=
|
|
76
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
75
77
|
|
|
76
78
|
@classmethod
|
|
77
79
|
def evaluate(
|
|
@@ -126,6 +128,7 @@ class Validation(Operator):
|
|
|
126
128
|
|
|
127
129
|
@classmethod
|
|
128
130
|
def validate(cls, dataset_element: Dataset, rule_info: Dict[str, Any], output: str) -> Dataset:
|
|
131
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
129
132
|
result_components = {comp.name: comp for comp in dataset_element.get_identifiers()}
|
|
130
133
|
result_components["ruleid"] = Component(
|
|
131
134
|
name="ruleid", data_type=String, role=Role.IDENTIFIER, nullable=False
|
|
@@ -154,7 +157,7 @@ class Validation(Operator):
|
|
|
154
157
|
name="errorlevel", data_type=Number, role=Role.MEASURE, nullable=True
|
|
155
158
|
)
|
|
156
159
|
|
|
157
|
-
return Dataset(name=
|
|
160
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
158
161
|
|
|
159
162
|
@classmethod
|
|
160
163
|
def evaluate(cls, dataset_element: Dataset, rule_info: Dict[str, Any], output: str) -> Dataset:
|
vtlengine/Operators/__init__.py
CHANGED
|
@@ -37,6 +37,7 @@ from vtlengine.DataTypes.TimeHandling import (
|
|
|
37
37
|
)
|
|
38
38
|
from vtlengine.Exceptions import SemanticError
|
|
39
39
|
from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar, ScalarSet
|
|
40
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
40
41
|
|
|
41
42
|
ALL_MODEL_DATA_TYPES = Union[Dataset, Scalar, DataComponent]
|
|
42
43
|
|
|
@@ -289,6 +290,7 @@ class Binary(Operator):
|
|
|
289
290
|
|
|
290
291
|
@classmethod
|
|
291
292
|
def dataset_validation(cls, left_operand: Dataset, right_operand: Dataset) -> Dataset:
|
|
293
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
292
294
|
left_identifiers = left_operand.get_identifiers_names()
|
|
293
295
|
right_identifiers = right_operand.get_identifiers_names()
|
|
294
296
|
|
|
@@ -336,12 +338,13 @@ class Binary(Operator):
|
|
|
336
338
|
right_comp = right_operand.components[comp.name]
|
|
337
339
|
comp.nullable = left_comp.nullable or right_comp.nullable
|
|
338
340
|
|
|
339
|
-
result_dataset = Dataset(name=
|
|
341
|
+
result_dataset = Dataset(name=dataset_name, components=result_components, data=None)
|
|
340
342
|
cls.apply_return_type_dataset(result_dataset, left_operand, right_operand)
|
|
341
343
|
return result_dataset
|
|
342
344
|
|
|
343
345
|
@classmethod
|
|
344
346
|
def dataset_scalar_validation(cls, dataset: Dataset, scalar: Scalar) -> Dataset:
|
|
347
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
345
348
|
if len(dataset.get_measures()) == 0:
|
|
346
349
|
raise SemanticError("1-1-1-8", op=cls.op, name=dataset.name)
|
|
347
350
|
|
|
@@ -350,7 +353,7 @@ class Binary(Operator):
|
|
|
350
353
|
for comp_name, comp in dataset.components.items()
|
|
351
354
|
if comp.role in [Role.IDENTIFIER, Role.MEASURE]
|
|
352
355
|
}
|
|
353
|
-
result_dataset = Dataset(name=
|
|
356
|
+
result_dataset = Dataset(name=dataset_name, components=result_components, data=None)
|
|
354
357
|
cls.apply_return_type_dataset(result_dataset, dataset, scalar)
|
|
355
358
|
return result_dataset
|
|
356
359
|
|
|
@@ -379,10 +382,10 @@ class Binary(Operator):
|
|
|
379
382
|
:param right_operand: The right component
|
|
380
383
|
:return: The result data type of the validation
|
|
381
384
|
"""
|
|
382
|
-
|
|
385
|
+
comp_name = VirtualCounter._new_dc_name()
|
|
383
386
|
result_data_type = cls.type_validation(left_operand.data_type, right_operand.data_type)
|
|
384
387
|
result = DataComponent(
|
|
385
|
-
name=
|
|
388
|
+
name=comp_name,
|
|
386
389
|
data_type=result_data_type,
|
|
387
390
|
data=None,
|
|
388
391
|
role=left_operand.role,
|
|
@@ -405,6 +408,7 @@ class Binary(Operator):
|
|
|
405
408
|
|
|
406
409
|
@classmethod
|
|
407
410
|
def dataset_set_validation(cls, dataset: Dataset, scalar_set: ScalarSet) -> Dataset:
|
|
411
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
408
412
|
if len(dataset.get_measures()) == 0:
|
|
409
413
|
raise SemanticError("1-1-1-8", op=cls.op, name=dataset.name)
|
|
410
414
|
for measure in dataset.get_measures():
|
|
@@ -415,7 +419,7 @@ class Binary(Operator):
|
|
|
415
419
|
if comp.role in [Role.IDENTIFIER, Role.MEASURE]
|
|
416
420
|
}
|
|
417
421
|
|
|
418
|
-
result_dataset = Dataset(name=
|
|
422
|
+
result_dataset = Dataset(name=dataset_name, components=result_components, data=None)
|
|
419
423
|
cls.apply_return_type_dataset(result_dataset, dataset, scalar_set)
|
|
420
424
|
return result_dataset
|
|
421
425
|
|
|
@@ -423,9 +427,10 @@ class Binary(Operator):
|
|
|
423
427
|
def component_set_validation(
|
|
424
428
|
cls, component: DataComponent, scalar_set: ScalarSet
|
|
425
429
|
) -> DataComponent:
|
|
430
|
+
comp_name = VirtualCounter._new_dc_name()
|
|
426
431
|
cls.type_validation(component.data_type, scalar_set.data_type)
|
|
427
432
|
result = DataComponent(
|
|
428
|
-
name=
|
|
433
|
+
name=comp_name,
|
|
429
434
|
data_type=cls.type_validation(component.data_type, scalar_set.data_type),
|
|
430
435
|
data=None,
|
|
431
436
|
role=Role.MEASURE,
|
|
@@ -757,6 +762,7 @@ class Unary(Operator):
|
|
|
757
762
|
|
|
758
763
|
@classmethod
|
|
759
764
|
def dataset_validation(cls, operand: Dataset) -> Dataset:
|
|
765
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
760
766
|
cls.validate_dataset_type(operand)
|
|
761
767
|
if len(operand.get_measures()) == 0:
|
|
762
768
|
raise SemanticError("1-1-1-8", op=cls.op, name=operand.name)
|
|
@@ -766,7 +772,7 @@ class Unary(Operator):
|
|
|
766
772
|
if comp.role in [Role.IDENTIFIER, Role.MEASURE]
|
|
767
773
|
}
|
|
768
774
|
|
|
769
|
-
result_dataset = Dataset(name=
|
|
775
|
+
result_dataset = Dataset(name=dataset_name, components=result_components, data=None)
|
|
770
776
|
cls.apply_return_type_dataset(result_dataset, operand)
|
|
771
777
|
return result_dataset
|
|
772
778
|
|
|
@@ -778,9 +784,10 @@ class Unary(Operator):
|
|
|
778
784
|
|
|
779
785
|
@classmethod
|
|
780
786
|
def component_validation(cls, operand: DataComponent) -> DataComponent:
|
|
787
|
+
comp_name = VirtualCounter._new_dc_name()
|
|
781
788
|
result_type = cls.type_validation(operand.data_type)
|
|
782
789
|
result = DataComponent(
|
|
783
|
-
name=
|
|
790
|
+
name=comp_name,
|
|
784
791
|
data_type=result_type,
|
|
785
792
|
data=None,
|
|
786
793
|
role=operand.role,
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from copy import copy
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class VirtualCounter:
|
|
5
|
+
_instance = None
|
|
6
|
+
dataset_count: int = 0
|
|
7
|
+
component_count: int = 0
|
|
8
|
+
|
|
9
|
+
def __init__(self) -> None:
|
|
10
|
+
self.dataset_count = 0
|
|
11
|
+
self.component_count = 0
|
|
12
|
+
|
|
13
|
+
def __new__(cls): # type: ignore[no-untyped-def]
|
|
14
|
+
if cls._instance is None:
|
|
15
|
+
cls._instance = super(VirtualCounter, cls).__new__(cls)
|
|
16
|
+
cls._instance.reset()
|
|
17
|
+
return cls._instance
|
|
18
|
+
|
|
19
|
+
@classmethod
|
|
20
|
+
def reset(cls) -> None:
|
|
21
|
+
cls.dataset_count = 0
|
|
22
|
+
cls.component_count = 0
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def _new_ds_name(cls) -> str:
|
|
26
|
+
cls.dataset_count += 1
|
|
27
|
+
name = f"@VDS_{copy(cls.dataset_count)}"
|
|
28
|
+
return name
|
|
29
|
+
|
|
30
|
+
@classmethod
|
|
31
|
+
def _new_dc_name(cls) -> str:
|
|
32
|
+
cls.component_count += 1
|
|
33
|
+
name = f"@VDC_{copy(cls.component_count)}"
|
|
34
|
+
return name
|
vtlengine/Utils/__init__.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
from typing import Any, Dict
|
|
2
2
|
|
|
3
|
+
from pysdmx.model.dataflow import Role
|
|
4
|
+
|
|
3
5
|
from vtlengine.AST.Grammar.tokens import (
|
|
4
6
|
ABS,
|
|
5
7
|
AGGREGATE,
|
|
@@ -428,3 +430,50 @@ HA_UNARY_MAPPING = {
|
|
|
428
430
|
PLUS: HRUnPlus,
|
|
429
431
|
MINUS: HRUnMinus,
|
|
430
432
|
}
|
|
433
|
+
VTL_DTYPES_MAPPING = {
|
|
434
|
+
"String": "String",
|
|
435
|
+
"Alpha": "String",
|
|
436
|
+
"AlphaNumeric": "String",
|
|
437
|
+
"Numeric": "String",
|
|
438
|
+
"BigInteger": "Integer",
|
|
439
|
+
"Integer": "Integer",
|
|
440
|
+
"Long": "Integer",
|
|
441
|
+
"Short": "Integer",
|
|
442
|
+
"Decimal": "Number",
|
|
443
|
+
"Float": "Number",
|
|
444
|
+
"Double": "Number",
|
|
445
|
+
"Boolean": "Boolean",
|
|
446
|
+
"URI": "String",
|
|
447
|
+
"Count": "Integer",
|
|
448
|
+
"InclusiveValueRange": "Number",
|
|
449
|
+
"ExclusiveValueRange": "Number",
|
|
450
|
+
"Incremental": "Number",
|
|
451
|
+
"ObservationalTimePeriod": "Time_Period",
|
|
452
|
+
"StandardTimePeriod": "Time_Period",
|
|
453
|
+
"BasicTimePeriod": "Date",
|
|
454
|
+
"GregorianTimePeriod": "Date",
|
|
455
|
+
"GregorianYear": "Date",
|
|
456
|
+
"GregorianYearMonth": "Date",
|
|
457
|
+
"GregorianMonth": "Date",
|
|
458
|
+
"GregorianDay": "Date",
|
|
459
|
+
"ReportingTimePeriod": "Time_Period",
|
|
460
|
+
"ReportingYear": "Time_Period",
|
|
461
|
+
"ReportingSemester": "Time_Period",
|
|
462
|
+
"ReportingTrimester": "Time_Period",
|
|
463
|
+
"ReportingQuarter": "Time_Period",
|
|
464
|
+
"ReportingMonth": "Time_Period",
|
|
465
|
+
"ReportingWeek": "Time_Period",
|
|
466
|
+
"ReportingDay": "Time_Period",
|
|
467
|
+
"DateTime": "Date",
|
|
468
|
+
"TimeRange": "Time",
|
|
469
|
+
"Month": "String",
|
|
470
|
+
"MonthDay": "String",
|
|
471
|
+
"Day": "String",
|
|
472
|
+
"Time": "String",
|
|
473
|
+
"Duration": "Duration",
|
|
474
|
+
}
|
|
475
|
+
VTL_ROLE_MAPPING = {
|
|
476
|
+
Role.DIMENSION: "Identifier",
|
|
477
|
+
Role.MEASURE: "Measure",
|
|
478
|
+
Role.ATTRIBUTE: "Attribute",
|
|
479
|
+
}
|