vtlengine 1.1.1__py3-none-any.whl → 1.2.1rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +62 -28
- vtlengine/API/__init__.py +25 -9
- vtlengine/AST/ASTConstructorModules/Expr.py +6 -3
- vtlengine/AST/DAG/__init__.py +34 -5
- vtlengine/AST/DAG/_words.py +1 -0
- vtlengine/AST/Grammar/Vtl.g4 +7 -7
- vtlengine/AST/Grammar/lexer.py +19759 -1112
- vtlengine/AST/Grammar/parser.py +17996 -3199
- vtlengine/Exceptions/messages.py +5 -2
- vtlengine/Interpreter/__init__.py +50 -7
- vtlengine/Operators/Aggregation.py +8 -3
- vtlengine/Operators/Analytic.py +3 -2
- vtlengine/Operators/CastOperator.py +5 -2
- vtlengine/Operators/Clause.py +26 -18
- vtlengine/Operators/Comparison.py +3 -1
- vtlengine/Operators/Conditional.py +35 -26
- vtlengine/Operators/General.py +3 -1
- vtlengine/Operators/HROperators.py +3 -1
- vtlengine/Operators/Join.py +9 -2
- vtlengine/Operators/Time.py +11 -5
- vtlengine/Operators/Validation.py +5 -2
- vtlengine/Operators/__init__.py +15 -8
- vtlengine/Utils/__Virtual_Assets.py +34 -0
- vtlengine/__init__.py +1 -1
- {vtlengine-1.1.1.dist-info → vtlengine-1.2.1rc1.dist-info}/METADATA +4 -4
- {vtlengine-1.1.1.dist-info → vtlengine-1.2.1rc1.dist-info}/RECORD +28 -27
- {vtlengine-1.1.1.dist-info → vtlengine-1.2.1rc1.dist-info}/LICENSE.md +0 -0
- {vtlengine-1.1.1.dist-info → vtlengine-1.2.1rc1.dist-info}/WHEEL +0 -0
vtlengine/Exceptions/messages.py
CHANGED
|
@@ -9,6 +9,8 @@ All exceptions exposed by the Vtl engine.
|
|
|
9
9
|
|
|
10
10
|
centralised_messages = {
|
|
11
11
|
# Input Validation errors
|
|
12
|
+
"0-1-1-1": "invalid script format type: {format_}. Input must be a string, "
|
|
13
|
+
"TransformationScheme or Path object",
|
|
12
14
|
"0-1-2-1": "Invalid json structure because additional properties have been supplied "
|
|
13
15
|
"on file {filename}.",
|
|
14
16
|
"0-1-2-2": "Errors found on file {filename}: {errors}",
|
|
@@ -22,6 +24,7 @@ centralised_messages = {
|
|
|
22
24
|
"0-1-3-4": "Dataset {short_urn} not found in mapping dictionary.",
|
|
23
25
|
"0-1-3-5": "Dataset {dataset_name} not found in the input datasets.",
|
|
24
26
|
"0-1-3-6": "Input name {missing} not found in the input datasets.",
|
|
27
|
+
"0-1-3-7": "Invalid input datasets type: {type_}. Expected a sequence of PandasDataset.",
|
|
25
28
|
# JSON Schema validations
|
|
26
29
|
"0-3-1-1": "Dataset {dataset} is not valid according to JSON schema",
|
|
27
30
|
# Infer Data Structure errors
|
|
@@ -241,8 +244,8 @@ centralised_messages = {
|
|
|
241
244
|
"2-3-1": "{comp_type} {comp_name} not found.",
|
|
242
245
|
"2-3-2": "{op_type} cannot be used with {node_op} operators.",
|
|
243
246
|
"2-3-4": "{op} operator must have a {comp}",
|
|
244
|
-
"2-3-5": "Expected {param_type}, got {type_name} on UDO {op}, parameter {param_name}",
|
|
245
|
-
"2-3-6": "Dataset {dataset_name} not found, please check input datastructures",
|
|
247
|
+
"2-3-5": "Expected {param_type}, got {type_name} on UDO {op}, parameter {param_name}.",
|
|
248
|
+
"2-3-6": "Dataset or scalar {dataset_name} not found, please check input datastructures.",
|
|
246
249
|
"2-3-9": "{comp_type} {comp_name} not found in {param}.",
|
|
247
250
|
"2-3-10": "No {comp_type} have been defined.",
|
|
248
251
|
"2-3-11": "{pos} operand must be a dataset.",
|
|
@@ -97,6 +97,7 @@ from vtlengine.Utils import (
|
|
|
97
97
|
THEN_ELSE,
|
|
98
98
|
UNARY_MAPPING,
|
|
99
99
|
)
|
|
100
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
100
101
|
|
|
101
102
|
|
|
102
103
|
# noinspection PyTypeChecker
|
|
@@ -104,6 +105,7 @@ from vtlengine.Utils import (
|
|
|
104
105
|
class InterpreterAnalyzer(ASTTemplate):
|
|
105
106
|
# Model elements
|
|
106
107
|
datasets: Dict[str, Dataset]
|
|
108
|
+
scalars: Optional[Dict[str, Scalar]] = None
|
|
107
109
|
value_domains: Optional[Dict[str, ValueDomain]] = None
|
|
108
110
|
external_routines: Optional[Dict[str, ExternalRoutine]] = None
|
|
109
111
|
# Analysis mode
|
|
@@ -202,6 +204,15 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
202
204
|
)
|
|
203
205
|
self.datasets[ds_name].data = None
|
|
204
206
|
|
|
207
|
+
def _save_scalars_efficient(self, scalars: Dict[str, Scalar]) -> None:
|
|
208
|
+
output_path = Path(self.output_path) # type: ignore[arg-type]
|
|
209
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
|
210
|
+
|
|
211
|
+
for name, scalar in scalars.items():
|
|
212
|
+
file_path = output_path / f"{name}.csv"
|
|
213
|
+
df = pd.DataFrame([[scalar.value]] if scalar.value is not None else [[]])
|
|
214
|
+
df.to_csv(file_path, header=False, index=False)
|
|
215
|
+
|
|
205
216
|
# **********************************
|
|
206
217
|
# * *
|
|
207
218
|
# * AST Visitors *
|
|
@@ -215,6 +226,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
215
226
|
else:
|
|
216
227
|
Operators.only_semantic = False
|
|
217
228
|
results = {}
|
|
229
|
+
scalars_to_save = set()
|
|
218
230
|
for child in node.children:
|
|
219
231
|
if isinstance(child, (AST.Assignment, AST.PersistentAssignment)):
|
|
220
232
|
vtlengine.Exceptions.dataset_output = child.left.value # type: ignore[attr-defined]
|
|
@@ -232,6 +244,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
232
244
|
self.else_condition_dataset = None
|
|
233
245
|
self.nested_condition = False
|
|
234
246
|
|
|
247
|
+
# Reset VirtualCounter
|
|
248
|
+
VirtualCounter.reset()
|
|
249
|
+
|
|
235
250
|
if result is None:
|
|
236
251
|
continue
|
|
237
252
|
|
|
@@ -240,9 +255,22 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
240
255
|
# Save results
|
|
241
256
|
self.datasets[result.name] = copy(result)
|
|
242
257
|
results[result.name] = result
|
|
258
|
+
if isinstance(result, Scalar):
|
|
259
|
+
scalars_to_save.add(result.name)
|
|
260
|
+
if self.scalars is None:
|
|
261
|
+
self.scalars = {}
|
|
262
|
+
self.scalars[result.name] = copy(result)
|
|
243
263
|
self._save_datapoints_efficient(statement_num)
|
|
244
264
|
statement_num += 1
|
|
245
265
|
|
|
266
|
+
if self.output_path is not None and scalars_to_save:
|
|
267
|
+
scalars_filtered = {
|
|
268
|
+
name: self.scalars[name] # type: ignore[index]
|
|
269
|
+
for name in scalars_to_save
|
|
270
|
+
if (not self.return_only_persistent or name in self.ds_analysis.get(PERSISTENT, [])) # type: ignore[union-attr]
|
|
271
|
+
}
|
|
272
|
+
self._save_scalars_efficient(scalars_filtered)
|
|
273
|
+
|
|
246
274
|
return results
|
|
247
275
|
|
|
248
276
|
# Definition Language
|
|
@@ -529,7 +557,10 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
529
557
|
# Setting here group by as we have already selected the identifiers we need
|
|
530
558
|
grouping_op = "group by"
|
|
531
559
|
|
|
532
|
-
|
|
560
|
+
result = AGGREGATION_MAPPING[node.op].analyze(operand, grouping_op, groupings, having)
|
|
561
|
+
if not self.is_from_regular_aggregation:
|
|
562
|
+
result.name = VirtualCounter._new_ds_name()
|
|
563
|
+
return result
|
|
533
564
|
|
|
534
565
|
def _format_having_expression_udo(self, having: str) -> str:
|
|
535
566
|
if self.udo_params is None:
|
|
@@ -560,16 +591,25 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
560
591
|
else:
|
|
561
592
|
operand_comp = self.visit(node.operand)
|
|
562
593
|
component_name = operand_comp.name
|
|
594
|
+
id_names = self.regular_aggregation_dataset.get_identifiers_names()
|
|
563
595
|
measure_names = self.regular_aggregation_dataset.get_measures_names()
|
|
596
|
+
attribute_names = self.regular_aggregation_dataset.get_attributes_names()
|
|
564
597
|
dataset_components = self.regular_aggregation_dataset.components.copy()
|
|
565
|
-
for name in measure_names:
|
|
566
|
-
|
|
567
|
-
|
|
598
|
+
for name in measure_names + attribute_names:
|
|
599
|
+
dataset_components.pop(name)
|
|
600
|
+
|
|
601
|
+
dataset_components[operand_comp.name] = Component(
|
|
602
|
+
name=operand_comp.name,
|
|
603
|
+
data_type=operand_comp.data_type,
|
|
604
|
+
role=operand_comp.role,
|
|
605
|
+
nullable=operand_comp.nullable,
|
|
606
|
+
)
|
|
568
607
|
|
|
569
608
|
if self.only_semantic or self.regular_aggregation_dataset.data is None:
|
|
570
609
|
data = None
|
|
571
610
|
else:
|
|
572
|
-
data = self.regular_aggregation_dataset.data[
|
|
611
|
+
data = self.regular_aggregation_dataset.data[id_names].copy()
|
|
612
|
+
data[operand_comp.name] = operand_comp.data
|
|
573
613
|
|
|
574
614
|
operand = Dataset(
|
|
575
615
|
name=self.regular_aggregation_dataset.name,
|
|
@@ -765,10 +805,10 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
765
805
|
if self.is_from_join and node.value in self.datasets:
|
|
766
806
|
return self.datasets[node.value]
|
|
767
807
|
if self.regular_aggregation_dataset is not None:
|
|
768
|
-
if
|
|
808
|
+
if self.scalars is not None and node.value in self.scalars:
|
|
769
809
|
if node.value in self.regular_aggregation_dataset.components:
|
|
770
810
|
raise SemanticError("1-1-6-11", comp_name=node.value)
|
|
771
|
-
return self.
|
|
811
|
+
return self.scalars[node.value]
|
|
772
812
|
if self.regular_aggregation_dataset.data is not None:
|
|
773
813
|
if (
|
|
774
814
|
self.is_from_join
|
|
@@ -833,8 +873,11 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
833
873
|
role=self.ruleset_dataset.components[comp_name].role,
|
|
834
874
|
nullable=self.ruleset_dataset.components[comp_name].nullable,
|
|
835
875
|
)
|
|
876
|
+
if self.scalars and node.value in self.scalars:
|
|
877
|
+
return self.scalars[node.value]
|
|
836
878
|
if node.value not in self.datasets:
|
|
837
879
|
raise SemanticError("2-3-6", dataset_name=node.value)
|
|
880
|
+
|
|
838
881
|
return self.datasets[node.value]
|
|
839
882
|
|
|
840
883
|
def visit_Collection(self, node: AST.Collection) -> Any:
|
|
@@ -103,12 +103,12 @@ class Aggregation(Operator.Unary):
|
|
|
103
103
|
elif measure.data_type == Duration:
|
|
104
104
|
if mode == "input":
|
|
105
105
|
data[measure.name] = data[measure.name].map(
|
|
106
|
-
lambda x: PERIOD_IND_MAPPING[x],
|
|
106
|
+
lambda x: PERIOD_IND_MAPPING[x],
|
|
107
107
|
na_action="ignore",
|
|
108
108
|
)
|
|
109
109
|
else:
|
|
110
110
|
data[measure.name] = data[measure.name].map(
|
|
111
|
-
lambda x: PERIOD_IND_MAPPING_REVERSE[x],
|
|
111
|
+
lambda x: PERIOD_IND_MAPPING_REVERSE[x],
|
|
112
112
|
na_action="ignore",
|
|
113
113
|
)
|
|
114
114
|
elif measure.data_type == Boolean:
|
|
@@ -170,9 +170,14 @@ class Aggregation(Operator.Unary):
|
|
|
170
170
|
for measure_name in operand.get_measures_names():
|
|
171
171
|
result_components.pop(measure_name)
|
|
172
172
|
new_comp = Component(
|
|
173
|
-
name="int_var",
|
|
173
|
+
name="int_var",
|
|
174
|
+
role=Role.MEASURE,
|
|
175
|
+
data_type=Integer,
|
|
176
|
+
nullable=True,
|
|
174
177
|
)
|
|
175
178
|
result_components["int_var"] = new_comp
|
|
179
|
+
|
|
180
|
+
# VDS is handled in visit_Aggregation
|
|
176
181
|
return Dataset(name="result", components=result_components, data=None)
|
|
177
182
|
|
|
178
183
|
@classmethod
|
vtlengine/Operators/Analytic.py
CHANGED
|
@@ -37,6 +37,7 @@ from vtlengine.DataTypes import (
|
|
|
37
37
|
)
|
|
38
38
|
from vtlengine.Exceptions import SemanticError
|
|
39
39
|
from vtlengine.Model import Component, Dataset, Role
|
|
40
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
40
41
|
|
|
41
42
|
return_integer_operators = [MAX, MIN, SUM]
|
|
42
43
|
|
|
@@ -157,8 +158,8 @@ class Analytic(Operator.Unary):
|
|
|
157
158
|
role=Role.MEASURE,
|
|
158
159
|
nullable=nullable,
|
|
159
160
|
)
|
|
160
|
-
|
|
161
|
-
return Dataset(name=
|
|
161
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
162
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
162
163
|
|
|
163
164
|
@classmethod
|
|
164
165
|
def analyticfunc(
|
|
@@ -22,6 +22,7 @@ from vtlengine.DataTypes import (
|
|
|
22
22
|
from vtlengine.DataTypes.TimeHandling import str_period_to_date
|
|
23
23
|
from vtlengine.Exceptions import SemanticError
|
|
24
24
|
from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar
|
|
25
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
25
26
|
|
|
26
27
|
duration_mapping = {"A": 6, "S": 5, "Q": 4, "M": 3, "W": 2, "D": 1}
|
|
27
28
|
|
|
@@ -331,7 +332,8 @@ class Cast(Operator.Unary):
|
|
|
331
332
|
role=Role.MEASURE,
|
|
332
333
|
nullable=measure.nullable,
|
|
333
334
|
)
|
|
334
|
-
|
|
335
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
336
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
335
337
|
|
|
336
338
|
@classmethod
|
|
337
339
|
def component_validation( # type: ignore[override]
|
|
@@ -346,7 +348,8 @@ class Cast(Operator.Unary):
|
|
|
346
348
|
|
|
347
349
|
from_type = operand.data_type
|
|
348
350
|
cls.check_cast(from_type, to_type, mask)
|
|
349
|
-
|
|
351
|
+
comp_name = VirtualCounter._new_dc_name()
|
|
352
|
+
return DataComponent(name=comp_name, data=None, data_type=to_type, role=operand.role)
|
|
350
353
|
|
|
351
354
|
@classmethod
|
|
352
355
|
def scalar_validation( # type: ignore[override]
|
vtlengine/Operators/Clause.py
CHANGED
|
@@ -15,6 +15,7 @@ from vtlengine.DataTypes import (
|
|
|
15
15
|
from vtlengine.Exceptions import SemanticError
|
|
16
16
|
from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar
|
|
17
17
|
from vtlengine.Operators import Operator
|
|
18
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
18
19
|
|
|
19
20
|
|
|
20
21
|
class Calc(Operator):
|
|
@@ -23,7 +24,8 @@ class Calc(Operator):
|
|
|
23
24
|
@classmethod
|
|
24
25
|
def validate(cls, operands: List[Union[DataComponent, Scalar]], dataset: Dataset) -> Dataset:
|
|
25
26
|
result_components = {name: copy(comp) for name, comp in dataset.components.items()}
|
|
26
|
-
|
|
27
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
28
|
+
result_dataset = Dataset(name=dataset_name, components=result_components, data=None)
|
|
27
29
|
|
|
28
30
|
for operand in operands:
|
|
29
31
|
if operand.name in result_dataset.components:
|
|
@@ -70,7 +72,8 @@ class Aggregate(Operator):
|
|
|
70
72
|
|
|
71
73
|
@classmethod
|
|
72
74
|
def validate(cls, operands: List[Union[DataComponent, Scalar]], dataset: Dataset) -> Dataset:
|
|
73
|
-
|
|
75
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
76
|
+
result_dataset = Dataset(name=dataset_name, components=dataset.components, data=None)
|
|
74
77
|
|
|
75
78
|
for operand in operands:
|
|
76
79
|
if operand.name in dataset.get_identifiers_names() or (
|
|
@@ -122,7 +125,8 @@ class Filter(Operator):
|
|
|
122
125
|
def validate(cls, condition: DataComponent, dataset: Dataset) -> Dataset:
|
|
123
126
|
if condition.data_type != Boolean:
|
|
124
127
|
raise ValueError(f"Filter condition must be of type {Boolean}")
|
|
125
|
-
|
|
128
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
129
|
+
return Dataset(name=dataset_name, components=dataset.components, data=None)
|
|
126
130
|
|
|
127
131
|
@classmethod
|
|
128
132
|
def evaluate(cls, condition: DataComponent, dataset: Dataset) -> Dataset:
|
|
@@ -139,19 +143,20 @@ class Keep(Operator):
|
|
|
139
143
|
|
|
140
144
|
@classmethod
|
|
141
145
|
def validate(cls, operands: List[str], dataset: Dataset) -> Dataset:
|
|
146
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
142
147
|
for operand in operands:
|
|
143
148
|
if operand not in dataset.get_components_names():
|
|
144
149
|
raise SemanticError(
|
|
145
|
-
"1-1-1-10", op=cls.op, comp_name=operand, dataset_name=
|
|
150
|
+
"1-1-1-10", op=cls.op, comp_name=operand, dataset_name=dataset_name
|
|
146
151
|
)
|
|
147
152
|
if dataset.get_component(operand).role == Role.IDENTIFIER:
|
|
148
|
-
raise SemanticError("1-1-6-2", op=cls.op, name=operand, dataset=
|
|
153
|
+
raise SemanticError("1-1-6-2", op=cls.op, name=operand, dataset=dataset_name)
|
|
149
154
|
result_components = {
|
|
150
155
|
name: comp
|
|
151
156
|
for name, comp in dataset.components.items()
|
|
152
157
|
if comp.name in operands or comp.role == Role.IDENTIFIER
|
|
153
158
|
}
|
|
154
|
-
return Dataset(name=
|
|
159
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
155
160
|
|
|
156
161
|
@classmethod
|
|
157
162
|
def evaluate(cls, operands: List[str], dataset: Dataset) -> Dataset:
|
|
@@ -170,17 +175,18 @@ class Drop(Operator):
|
|
|
170
175
|
|
|
171
176
|
@classmethod
|
|
172
177
|
def validate(cls, operands: List[str], dataset: Dataset) -> Dataset:
|
|
178
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
173
179
|
for operand in operands:
|
|
174
180
|
if operand not in dataset.components:
|
|
175
|
-
raise SemanticError("1-1-1-10", comp_name=operand, dataset_name=
|
|
181
|
+
raise SemanticError("1-1-1-10", comp_name=operand, dataset_name=dataset_name)
|
|
176
182
|
if dataset.get_component(operand).role == Role.IDENTIFIER:
|
|
177
|
-
raise SemanticError("1-1-6-2", op=cls.op, name=operand, dataset=
|
|
183
|
+
raise SemanticError("1-1-6-2", op=cls.op, name=operand, dataset=dataset_name)
|
|
178
184
|
if len(dataset.components) == len(operands):
|
|
179
185
|
raise SemanticError("1-1-6-12", op=cls.op)
|
|
180
186
|
result_components = {
|
|
181
187
|
name: comp for name, comp in dataset.components.items() if comp.name not in operands
|
|
182
188
|
}
|
|
183
|
-
return Dataset(name=
|
|
189
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
184
190
|
|
|
185
191
|
@classmethod
|
|
186
192
|
def evaluate(cls, operands: List[str], dataset: Dataset) -> Dataset:
|
|
@@ -195,6 +201,7 @@ class Rename(Operator):
|
|
|
195
201
|
|
|
196
202
|
@classmethod
|
|
197
203
|
def validate(cls, operands: List[RenameNode], dataset: Dataset) -> Dataset:
|
|
204
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
198
205
|
from_names = [operand.old_name for operand in operands]
|
|
199
206
|
if len(from_names) != len(set(from_names)):
|
|
200
207
|
duplicates = set([name for name in from_names if from_names.count(name) > 1])
|
|
@@ -211,14 +218,14 @@ class Rename(Operator):
|
|
|
211
218
|
"1-1-1-10",
|
|
212
219
|
op=cls.op,
|
|
213
220
|
comp_name=operand.old_name,
|
|
214
|
-
dataset_name=
|
|
221
|
+
dataset_name=dataset_name,
|
|
215
222
|
)
|
|
216
223
|
if operand.new_name in dataset.components:
|
|
217
224
|
raise SemanticError(
|
|
218
225
|
"1-1-6-8",
|
|
219
226
|
op=cls.op,
|
|
220
227
|
comp_name=operand.new_name,
|
|
221
|
-
dataset_name=
|
|
228
|
+
dataset_name=dataset_name,
|
|
222
229
|
)
|
|
223
230
|
|
|
224
231
|
result_components = {comp.name: comp for comp in dataset.components.values()}
|
|
@@ -230,8 +237,7 @@ class Rename(Operator):
|
|
|
230
237
|
nullable=result_components[operand.old_name].nullable,
|
|
231
238
|
)
|
|
232
239
|
del result_components[operand.old_name]
|
|
233
|
-
|
|
234
|
-
return Dataset(name=dataset.name, components=result_components, data=None)
|
|
240
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
235
241
|
|
|
236
242
|
@classmethod
|
|
237
243
|
def evaluate(cls, operands: List[RenameNode], dataset: Dataset) -> Dataset:
|
|
@@ -256,6 +262,7 @@ class Pivot(Operator):
|
|
|
256
262
|
class Unpivot(Operator):
|
|
257
263
|
@classmethod
|
|
258
264
|
def validate(cls, operands: List[str], dataset: Dataset) -> Dataset:
|
|
265
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
259
266
|
if len(operands) != 2:
|
|
260
267
|
raise ValueError("Unpivot clause requires two operands")
|
|
261
268
|
identifier, measure = operands
|
|
@@ -263,10 +270,10 @@ class Unpivot(Operator):
|
|
|
263
270
|
if len(dataset.get_identifiers()) < 1:
|
|
264
271
|
raise SemanticError("1-3-27", op=cls.op)
|
|
265
272
|
if identifier in dataset.components:
|
|
266
|
-
raise SemanticError("1-1-6-2", op=cls.op, name=identifier, dataset=
|
|
273
|
+
raise SemanticError("1-1-6-2", op=cls.op, name=identifier, dataset=dataset_name)
|
|
267
274
|
|
|
268
275
|
result_components = {comp.name: comp for comp in dataset.get_identifiers()}
|
|
269
|
-
result_dataset = Dataset(name=
|
|
276
|
+
result_dataset = Dataset(name=dataset_name, components=result_components, data=None)
|
|
270
277
|
# noinspection PyTypeChecker
|
|
271
278
|
result_dataset.add_component(
|
|
272
279
|
Component(name=identifier, data_type=String, role=Role.IDENTIFIER, nullable=False)
|
|
@@ -306,6 +313,7 @@ class Sub(Operator):
|
|
|
306
313
|
|
|
307
314
|
@classmethod
|
|
308
315
|
def validate(cls, operands: List[DataComponent], dataset: Dataset) -> Dataset:
|
|
316
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
309
317
|
if len(dataset.get_identifiers()) < 1:
|
|
310
318
|
raise SemanticError("1-3-27", op=cls.op)
|
|
311
319
|
for operand in operands:
|
|
@@ -314,14 +322,14 @@ class Sub(Operator):
|
|
|
314
322
|
"1-1-1-10",
|
|
315
323
|
op=cls.op,
|
|
316
324
|
comp_name=operand.name,
|
|
317
|
-
dataset_name=
|
|
325
|
+
dataset_name=dataset_name,
|
|
318
326
|
)
|
|
319
327
|
if operand.role != Role.IDENTIFIER:
|
|
320
328
|
raise SemanticError(
|
|
321
329
|
"1-1-6-10",
|
|
322
330
|
op=cls.op,
|
|
323
331
|
operand=operand.name,
|
|
324
|
-
dataset_name=
|
|
332
|
+
dataset_name=dataset_name,
|
|
325
333
|
)
|
|
326
334
|
if isinstance(operand, Scalar):
|
|
327
335
|
raise SemanticError("1-1-6-5", op=cls.op, name=operand.name)
|
|
@@ -331,7 +339,7 @@ class Sub(Operator):
|
|
|
331
339
|
for name, comp in dataset.components.items()
|
|
332
340
|
if comp.name not in [operand.name for operand in operands]
|
|
333
341
|
}
|
|
334
|
-
return Dataset(name=
|
|
342
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
335
343
|
|
|
336
344
|
@classmethod
|
|
337
345
|
def evaluate(cls, operands: List[DataComponent], dataset: Dataset) -> Dataset:
|
|
@@ -25,6 +25,7 @@ from vtlengine.AST.Grammar.tokens import (
|
|
|
25
25
|
from vtlengine.DataTypes import COMP_NAME_MAPPING, Boolean, Null, Number, String
|
|
26
26
|
from vtlengine.Exceptions import SemanticError
|
|
27
27
|
from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar, ScalarSet
|
|
28
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
28
29
|
|
|
29
30
|
|
|
30
31
|
class Unary(Operator.Unary):
|
|
@@ -403,6 +404,7 @@ class ExistIn(Operator.Operator):
|
|
|
403
404
|
def validate(
|
|
404
405
|
cls, dataset_1: Dataset, dataset_2: Dataset, retain_element: Optional[Boolean]
|
|
405
406
|
) -> Any:
|
|
407
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
406
408
|
left_identifiers = dataset_1.get_identifiers_names()
|
|
407
409
|
right_identifiers = dataset_2.get_identifiers_names()
|
|
408
410
|
|
|
@@ -412,7 +414,7 @@ class ExistIn(Operator.Operator):
|
|
|
412
414
|
raise ValueError("Datasets must have common identifiers")
|
|
413
415
|
|
|
414
416
|
result_components = {comp.name: copy(comp) for comp in dataset_1.get_identifiers()}
|
|
415
|
-
result_dataset = Dataset(name=
|
|
417
|
+
result_dataset = Dataset(name=dataset_name, components=result_components, data=None)
|
|
416
418
|
result_dataset.add_component(
|
|
417
419
|
Component(name="bool_var", data_type=Boolean, role=Role.MEASURE, nullable=False)
|
|
418
420
|
)
|
|
@@ -19,6 +19,7 @@ from vtlengine.DataTypes import (
|
|
|
19
19
|
from vtlengine.Exceptions import SemanticError
|
|
20
20
|
from vtlengine.Model import DataComponent, Dataset, Role, Scalar
|
|
21
21
|
from vtlengine.Operators import Binary, Operator
|
|
22
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
22
23
|
|
|
23
24
|
|
|
24
25
|
class If(Operator):
|
|
@@ -75,8 +76,8 @@ class If(Operator):
|
|
|
75
76
|
) -> Dataset:
|
|
76
77
|
ids = condition.get_identifiers_names()
|
|
77
78
|
condition_measure = condition.get_measures_names()[0]
|
|
78
|
-
true_data = condition.data[condition.data[condition_measure] == True]
|
|
79
|
-
false_data = condition.data[condition.data[condition_measure] != True]
|
|
79
|
+
true_data = condition.data[condition.data[condition_measure].dropna() == True]
|
|
80
|
+
false_data = condition.data[condition.data[condition_measure] != True]
|
|
80
81
|
|
|
81
82
|
if isinstance(true_branch, Dataset):
|
|
82
83
|
if len(true_data) > 0 and true_branch.data is not None:
|
|
@@ -84,7 +85,7 @@ class If(Operator):
|
|
|
84
85
|
true_data,
|
|
85
86
|
true_branch.data,
|
|
86
87
|
on=ids,
|
|
87
|
-
how="
|
|
88
|
+
how="left",
|
|
88
89
|
suffixes=("_condition", ""),
|
|
89
90
|
)
|
|
90
91
|
else:
|
|
@@ -99,7 +100,7 @@ class If(Operator):
|
|
|
99
100
|
false_data,
|
|
100
101
|
false_branch.data,
|
|
101
102
|
on=ids,
|
|
102
|
-
how="
|
|
103
|
+
how="left",
|
|
103
104
|
suffixes=("_condition", ""),
|
|
104
105
|
)
|
|
105
106
|
else:
|
|
@@ -113,12 +114,12 @@ class If(Operator):
|
|
|
113
114
|
pd.concat([true_data, false_data], ignore_index=True)
|
|
114
115
|
.drop_duplicates()
|
|
115
116
|
.sort_values(by=ids)
|
|
116
|
-
)
|
|
117
|
+
).reset_index(drop=True)
|
|
117
118
|
if isinstance(result, Dataset):
|
|
118
119
|
drop_columns = [
|
|
119
120
|
column for column in result.data.columns if column not in result.components
|
|
120
121
|
]
|
|
121
|
-
result.data = result.data.
|
|
122
|
+
result.data = result.data.drop(columns=drop_columns)
|
|
122
123
|
if isinstance(true_branch, Scalar) and isinstance(false_branch, Scalar):
|
|
123
124
|
result.get_measures()[0].data_type = true_branch.data_type
|
|
124
125
|
result.get_measures()[0].name = COMP_NAME_MAPPING[true_branch.data_type]
|
|
@@ -135,6 +136,7 @@ class If(Operator):
|
|
|
135
136
|
nullable = False
|
|
136
137
|
left = true_branch
|
|
137
138
|
right = false_branch
|
|
139
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
138
140
|
if true_branch.__class__ != false_branch.__class__:
|
|
139
141
|
if (isinstance(true_branch, DataComponent) and isinstance(false_branch, Dataset)) or (
|
|
140
142
|
isinstance(true_branch, Dataset) and isinstance(false_branch, DataComponent)
|
|
@@ -147,6 +149,7 @@ class If(Operator):
|
|
|
147
149
|
right = true_branch
|
|
148
150
|
|
|
149
151
|
# Datacomponent
|
|
152
|
+
comp_name = VirtualCounter._new_dc_name()
|
|
150
153
|
if isinstance(condition, DataComponent):
|
|
151
154
|
if not condition.data_type == Boolean:
|
|
152
155
|
raise SemanticError(
|
|
@@ -154,17 +157,21 @@ class If(Operator):
|
|
|
154
157
|
op=cls.op,
|
|
155
158
|
type=SCALAR_TYPES_CLASS_REVERSE[condition.data_type],
|
|
156
159
|
)
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
nullable
|
|
164
|
-
if isinstance(right, DataComponent):
|
|
165
|
-
nullable
|
|
160
|
+
|
|
161
|
+
if (
|
|
162
|
+
isinstance(left, Scalar)
|
|
163
|
+
and isinstance(right, Scalar)
|
|
164
|
+
and (left.data_type == Null or right.data_type == Null)
|
|
165
|
+
):
|
|
166
|
+
nullable = True
|
|
167
|
+
if isinstance(left, DataComponent) and isinstance(right, DataComponent):
|
|
168
|
+
nullable = left.nullable or right.nullable
|
|
169
|
+
elif isinstance(left, DataComponent):
|
|
170
|
+
nullable = left.nullable or right.data_type == Null
|
|
171
|
+
elif isinstance(right, DataComponent):
|
|
172
|
+
nullable = left.data_type == Null or right.nullable
|
|
166
173
|
return DataComponent(
|
|
167
|
-
name=
|
|
174
|
+
name=comp_name,
|
|
168
175
|
data=None,
|
|
169
176
|
data_type=binary_implicit_promotion(left.data_type, right.data_type),
|
|
170
177
|
role=Role.MEASURE,
|
|
@@ -184,7 +191,7 @@ class If(Operator):
|
|
|
184
191
|
left.data_type = right.data_type = binary_implicit_promotion(
|
|
185
192
|
left.data_type, right.data_type
|
|
186
193
|
)
|
|
187
|
-
return Dataset(name=
|
|
194
|
+
return Dataset(name=dataset_name, components=copy(condition.components), data=None)
|
|
188
195
|
if left.get_identifiers() != condition.get_identifiers():
|
|
189
196
|
raise SemanticError("1-1-9-10", op=cls.op, clause=left.name)
|
|
190
197
|
if isinstance(right, Scalar):
|
|
@@ -218,7 +225,7 @@ class If(Operator):
|
|
|
218
225
|
if left.get_identifiers() != condition.get_identifiers():
|
|
219
226
|
raise SemanticError("1-1-9-6", op=cls.op)
|
|
220
227
|
result_components = {comp_name: copy(comp) for comp_name, comp in left.components.items()}
|
|
221
|
-
return Dataset(name=
|
|
228
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
222
229
|
|
|
223
230
|
|
|
224
231
|
class Nvl(Binary):
|
|
@@ -254,6 +261,8 @@ class Nvl(Binary):
|
|
|
254
261
|
|
|
255
262
|
@classmethod
|
|
256
263
|
def validate(cls, left: Any, right: Any) -> Union[Scalar, DataComponent, Dataset]:
|
|
264
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
265
|
+
comp_name = VirtualCounter._new_dc_name()
|
|
257
266
|
result_components = {}
|
|
258
267
|
if isinstance(left, Scalar):
|
|
259
268
|
if not isinstance(right, Scalar):
|
|
@@ -271,7 +280,7 @@ class Nvl(Binary):
|
|
|
271
280
|
)
|
|
272
281
|
cls.type_validation(left.data_type, right.data_type)
|
|
273
282
|
return DataComponent(
|
|
274
|
-
name=
|
|
283
|
+
name=comp_name,
|
|
275
284
|
data=pd.Series(dtype=object),
|
|
276
285
|
data_type=left.data_type,
|
|
277
286
|
role=Role.MEASURE,
|
|
@@ -298,7 +307,7 @@ class Nvl(Binary):
|
|
|
298
307
|
}
|
|
299
308
|
for comp in result_components.values():
|
|
300
309
|
comp.nullable = False
|
|
301
|
-
return Dataset(name=
|
|
310
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
302
311
|
|
|
303
312
|
|
|
304
313
|
class Case(Operator):
|
|
@@ -393,6 +402,8 @@ class Case(Operator):
|
|
|
393
402
|
def validate(
|
|
394
403
|
cls, conditions: List[Any], thenOps: List[Any], elseOp: Any
|
|
395
404
|
) -> Union[Scalar, DataComponent, Dataset]:
|
|
405
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
406
|
+
comp_name = VirtualCounter._new_dc_name()
|
|
396
407
|
if len(set(map(type, conditions))) > 1:
|
|
397
408
|
raise SemanticError("2-1-9-1", op=cls.op)
|
|
398
409
|
|
|
@@ -426,17 +437,15 @@ class Case(Operator):
|
|
|
426
437
|
raise SemanticError("2-1-9-4", op=cls.op, name=condition.name)
|
|
427
438
|
|
|
428
439
|
nullable = any(
|
|
429
|
-
(
|
|
430
|
-
for
|
|
440
|
+
(op.nullable if isinstance(op, DataComponent) else op.data_type == Null)
|
|
441
|
+
for op in ops
|
|
431
442
|
)
|
|
432
|
-
nullable |= any(condition.nullable for condition in conditions)
|
|
433
|
-
|
|
434
443
|
data_type = ops[0].data_type
|
|
435
444
|
for op in ops[1:]:
|
|
436
445
|
data_type = binary_implicit_promotion(data_type, op.data_type)
|
|
437
446
|
|
|
438
447
|
return DataComponent(
|
|
439
|
-
name=
|
|
448
|
+
name=comp_name,
|
|
440
449
|
data=None,
|
|
441
450
|
data_type=data_type,
|
|
442
451
|
role=Role.MEASURE,
|
|
@@ -459,4 +468,4 @@ class Case(Operator):
|
|
|
459
468
|
if isinstance(op, Dataset) and op.get_components_names() != comp_names:
|
|
460
469
|
raise SemanticError("2-1-9-7", op=cls.op)
|
|
461
470
|
|
|
462
|
-
return Dataset(name=
|
|
471
|
+
return Dataset(name=dataset_name, components=components, data=None)
|
vtlengine/Operators/General.py
CHANGED
|
@@ -7,6 +7,7 @@ from vtlengine.DataTypes import COMP_NAME_MAPPING
|
|
|
7
7
|
from vtlengine.Exceptions import SemanticError
|
|
8
8
|
from vtlengine.Model import Component, DataComponent, Dataset, ExternalRoutine, Role
|
|
9
9
|
from vtlengine.Operators import Binary, Unary
|
|
10
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
class Membership(Binary):
|
|
@@ -21,6 +22,7 @@ class Membership(Binary):
|
|
|
21
22
|
|
|
22
23
|
@classmethod
|
|
23
24
|
def validate(cls, left_operand: Any, right_operand: Any) -> Dataset:
|
|
25
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
24
26
|
if right_operand not in left_operand.components:
|
|
25
27
|
raise SemanticError(
|
|
26
28
|
"1-1-1-10",
|
|
@@ -46,7 +48,7 @@ class Membership(Binary):
|
|
|
46
48
|
for name, comp in left_operand.components.items()
|
|
47
49
|
if comp.role == Role.IDENTIFIER or comp.name == right_operand
|
|
48
50
|
}
|
|
49
|
-
result_dataset = Dataset(name=
|
|
51
|
+
result_dataset = Dataset(name=dataset_name, components=result_components, data=None)
|
|
50
52
|
return result_dataset
|
|
51
53
|
|
|
52
54
|
@classmethod
|
|
@@ -9,6 +9,7 @@ import vtlengine.Operators as Operators
|
|
|
9
9
|
from vtlengine.AST.Grammar.tokens import HIERARCHY
|
|
10
10
|
from vtlengine.DataTypes import Boolean, Number
|
|
11
11
|
from vtlengine.Model import Component, DataComponent, Dataset, Role
|
|
12
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
def get_measure_from_dataset(dataset: Dataset, code_item: str) -> DataComponent:
|
|
@@ -221,10 +222,11 @@ class Hierarchy(Operators.Operator):
|
|
|
221
222
|
def validate(
|
|
222
223
|
cls, dataset: Dataset, computed_dict: Dict[str, DataFrame], output: str
|
|
223
224
|
) -> Dataset:
|
|
225
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
224
226
|
result_components = {
|
|
225
227
|
comp_name: copy(comp) for comp_name, comp in dataset.components.items()
|
|
226
228
|
}
|
|
227
|
-
return Dataset(name=
|
|
229
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
228
230
|
|
|
229
231
|
@classmethod
|
|
230
232
|
def evaluate(
|