vtlengine 1.1.1__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +62 -28
- vtlengine/API/__init__.py +25 -9
- vtlengine/AST/ASTConstructorModules/Expr.py +6 -3
- vtlengine/AST/DAG/__init__.py +34 -5
- vtlengine/AST/DAG/_words.py +1 -0
- vtlengine/AST/Grammar/Vtl.g4 +7 -7
- vtlengine/AST/Grammar/lexer.py +19759 -1112
- vtlengine/AST/Grammar/parser.py +17996 -3199
- vtlengine/Exceptions/messages.py +5 -2
- vtlengine/Interpreter/__init__.py +37 -3
- vtlengine/Operators/Aggregation.py +6 -1
- vtlengine/Operators/Analytic.py +3 -2
- vtlengine/Operators/CastOperator.py +5 -2
- vtlengine/Operators/Clause.py +26 -18
- vtlengine/Operators/Comparison.py +3 -1
- vtlengine/Operators/Conditional.py +20 -13
- vtlengine/Operators/General.py +3 -1
- vtlengine/Operators/HROperators.py +3 -1
- vtlengine/Operators/Join.py +4 -2
- vtlengine/Operators/Time.py +11 -5
- vtlengine/Operators/Validation.py +5 -2
- vtlengine/Operators/__init__.py +15 -8
- vtlengine/Utils/__Virtual_Assets.py +34 -0
- vtlengine/__init__.py +1 -1
- {vtlengine-1.1.1.dist-info → vtlengine-1.2.0.dist-info}/METADATA +4 -4
- {vtlengine-1.1.1.dist-info → vtlengine-1.2.0.dist-info}/RECORD +28 -27
- {vtlengine-1.1.1.dist-info → vtlengine-1.2.0.dist-info}/LICENSE.md +0 -0
- {vtlengine-1.1.1.dist-info → vtlengine-1.2.0.dist-info}/WHEEL +0 -0
vtlengine/Exceptions/messages.py
CHANGED
|
@@ -9,6 +9,8 @@ All exceptions exposed by the Vtl engine.
|
|
|
9
9
|
|
|
10
10
|
centralised_messages = {
|
|
11
11
|
# Input Validation errors
|
|
12
|
+
"0-1-1-1": "invalid script format type: {format_}. Input must be a string, "
|
|
13
|
+
"TransformationScheme or Path object",
|
|
12
14
|
"0-1-2-1": "Invalid json structure because additional properties have been supplied "
|
|
13
15
|
"on file {filename}.",
|
|
14
16
|
"0-1-2-2": "Errors found on file {filename}: {errors}",
|
|
@@ -22,6 +24,7 @@ centralised_messages = {
|
|
|
22
24
|
"0-1-3-4": "Dataset {short_urn} not found in mapping dictionary.",
|
|
23
25
|
"0-1-3-5": "Dataset {dataset_name} not found in the input datasets.",
|
|
24
26
|
"0-1-3-6": "Input name {missing} not found in the input datasets.",
|
|
27
|
+
"0-1-3-7": "Invalid input datasets type: {type_}. Expected a sequence of PandasDataset.",
|
|
25
28
|
# JSON Schema validations
|
|
26
29
|
"0-3-1-1": "Dataset {dataset} is not valid according to JSON schema",
|
|
27
30
|
# Infer Data Structure errors
|
|
@@ -241,8 +244,8 @@ centralised_messages = {
|
|
|
241
244
|
"2-3-1": "{comp_type} {comp_name} not found.",
|
|
242
245
|
"2-3-2": "{op_type} cannot be used with {node_op} operators.",
|
|
243
246
|
"2-3-4": "{op} operator must have a {comp}",
|
|
244
|
-
"2-3-5": "Expected {param_type}, got {type_name} on UDO {op}, parameter {param_name}",
|
|
245
|
-
"2-3-6": "Dataset {dataset_name} not found, please check input datastructures",
|
|
247
|
+
"2-3-5": "Expected {param_type}, got {type_name} on UDO {op}, parameter {param_name}.",
|
|
248
|
+
"2-3-6": "Dataset or scalar {dataset_name} not found, please check input datastructures.",
|
|
246
249
|
"2-3-9": "{comp_type} {comp_name} not found in {param}.",
|
|
247
250
|
"2-3-10": "No {comp_type} have been defined.",
|
|
248
251
|
"2-3-11": "{pos} operand must be a dataset.",
|
|
@@ -97,6 +97,7 @@ from vtlengine.Utils import (
|
|
|
97
97
|
THEN_ELSE,
|
|
98
98
|
UNARY_MAPPING,
|
|
99
99
|
)
|
|
100
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
100
101
|
|
|
101
102
|
|
|
102
103
|
# noinspection PyTypeChecker
|
|
@@ -104,6 +105,7 @@ from vtlengine.Utils import (
|
|
|
104
105
|
class InterpreterAnalyzer(ASTTemplate):
|
|
105
106
|
# Model elements
|
|
106
107
|
datasets: Dict[str, Dataset]
|
|
108
|
+
scalars: Optional[Dict[str, Scalar]] = None
|
|
107
109
|
value_domains: Optional[Dict[str, ValueDomain]] = None
|
|
108
110
|
external_routines: Optional[Dict[str, ExternalRoutine]] = None
|
|
109
111
|
# Analysis mode
|
|
@@ -202,6 +204,15 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
202
204
|
)
|
|
203
205
|
self.datasets[ds_name].data = None
|
|
204
206
|
|
|
207
|
+
def _save_scalars_efficient(self, scalars: Dict[str, Scalar]) -> None:
|
|
208
|
+
output_path = Path(self.output_path) # type: ignore[arg-type]
|
|
209
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
|
210
|
+
|
|
211
|
+
for name, scalar in scalars.items():
|
|
212
|
+
file_path = output_path / f"{name}.csv"
|
|
213
|
+
df = pd.DataFrame([[scalar.value]] if scalar.value is not None else [[]])
|
|
214
|
+
df.to_csv(file_path, header=False, index=False)
|
|
215
|
+
|
|
205
216
|
# **********************************
|
|
206
217
|
# * *
|
|
207
218
|
# * AST Visitors *
|
|
@@ -215,6 +226,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
215
226
|
else:
|
|
216
227
|
Operators.only_semantic = False
|
|
217
228
|
results = {}
|
|
229
|
+
scalars_to_save = set()
|
|
218
230
|
for child in node.children:
|
|
219
231
|
if isinstance(child, (AST.Assignment, AST.PersistentAssignment)):
|
|
220
232
|
vtlengine.Exceptions.dataset_output = child.left.value # type: ignore[attr-defined]
|
|
@@ -232,6 +244,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
232
244
|
self.else_condition_dataset = None
|
|
233
245
|
self.nested_condition = False
|
|
234
246
|
|
|
247
|
+
# Reset VirtualCounter
|
|
248
|
+
VirtualCounter.reset()
|
|
249
|
+
|
|
235
250
|
if result is None:
|
|
236
251
|
continue
|
|
237
252
|
|
|
@@ -240,9 +255,22 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
240
255
|
# Save results
|
|
241
256
|
self.datasets[result.name] = copy(result)
|
|
242
257
|
results[result.name] = result
|
|
258
|
+
if isinstance(result, Scalar):
|
|
259
|
+
scalars_to_save.add(result.name)
|
|
260
|
+
if self.scalars is None:
|
|
261
|
+
self.scalars = {}
|
|
262
|
+
self.scalars[result.name] = copy(result)
|
|
243
263
|
self._save_datapoints_efficient(statement_num)
|
|
244
264
|
statement_num += 1
|
|
245
265
|
|
|
266
|
+
if self.output_path is not None and scalars_to_save:
|
|
267
|
+
scalars_filtered = {
|
|
268
|
+
name: self.scalars[name] # type: ignore[index]
|
|
269
|
+
for name in scalars_to_save
|
|
270
|
+
if (not self.return_only_persistent or name in self.ds_analysis.get(PERSISTENT, [])) # type: ignore[union-attr]
|
|
271
|
+
}
|
|
272
|
+
self._save_scalars_efficient(scalars_filtered)
|
|
273
|
+
|
|
246
274
|
return results
|
|
247
275
|
|
|
248
276
|
# Definition Language
|
|
@@ -529,7 +557,10 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
529
557
|
# Setting here group by as we have already selected the identifiers we need
|
|
530
558
|
grouping_op = "group by"
|
|
531
559
|
|
|
532
|
-
|
|
560
|
+
result = AGGREGATION_MAPPING[node.op].analyze(operand, grouping_op, groupings, having)
|
|
561
|
+
if not self.is_from_regular_aggregation:
|
|
562
|
+
result.name = VirtualCounter._new_ds_name()
|
|
563
|
+
return result
|
|
533
564
|
|
|
534
565
|
def _format_having_expression_udo(self, having: str) -> str:
|
|
535
566
|
if self.udo_params is None:
|
|
@@ -765,10 +796,10 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
765
796
|
if self.is_from_join and node.value in self.datasets:
|
|
766
797
|
return self.datasets[node.value]
|
|
767
798
|
if self.regular_aggregation_dataset is not None:
|
|
768
|
-
if
|
|
799
|
+
if self.scalars is not None and node.value in self.scalars:
|
|
769
800
|
if node.value in self.regular_aggregation_dataset.components:
|
|
770
801
|
raise SemanticError("1-1-6-11", comp_name=node.value)
|
|
771
|
-
return self.
|
|
802
|
+
return self.scalars[node.value]
|
|
772
803
|
if self.regular_aggregation_dataset.data is not None:
|
|
773
804
|
if (
|
|
774
805
|
self.is_from_join
|
|
@@ -833,8 +864,11 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
833
864
|
role=self.ruleset_dataset.components[comp_name].role,
|
|
834
865
|
nullable=self.ruleset_dataset.components[comp_name].nullable,
|
|
835
866
|
)
|
|
867
|
+
if self.scalars and node.value in self.scalars:
|
|
868
|
+
return self.scalars[node.value]
|
|
836
869
|
if node.value not in self.datasets:
|
|
837
870
|
raise SemanticError("2-3-6", dataset_name=node.value)
|
|
871
|
+
|
|
838
872
|
return self.datasets[node.value]
|
|
839
873
|
|
|
840
874
|
def visit_Collection(self, node: AST.Collection) -> Any:
|
|
@@ -170,9 +170,14 @@ class Aggregation(Operator.Unary):
|
|
|
170
170
|
for measure_name in operand.get_measures_names():
|
|
171
171
|
result_components.pop(measure_name)
|
|
172
172
|
new_comp = Component(
|
|
173
|
-
name="int_var",
|
|
173
|
+
name="int_var",
|
|
174
|
+
role=Role.MEASURE,
|
|
175
|
+
data_type=Integer,
|
|
176
|
+
nullable=True,
|
|
174
177
|
)
|
|
175
178
|
result_components["int_var"] = new_comp
|
|
179
|
+
|
|
180
|
+
# VDS is handled in visit_Aggregation
|
|
176
181
|
return Dataset(name="result", components=result_components, data=None)
|
|
177
182
|
|
|
178
183
|
@classmethod
|
vtlengine/Operators/Analytic.py
CHANGED
|
@@ -37,6 +37,7 @@ from vtlengine.DataTypes import (
|
|
|
37
37
|
)
|
|
38
38
|
from vtlengine.Exceptions import SemanticError
|
|
39
39
|
from vtlengine.Model import Component, Dataset, Role
|
|
40
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
40
41
|
|
|
41
42
|
return_integer_operators = [MAX, MIN, SUM]
|
|
42
43
|
|
|
@@ -157,8 +158,8 @@ class Analytic(Operator.Unary):
|
|
|
157
158
|
role=Role.MEASURE,
|
|
158
159
|
nullable=nullable,
|
|
159
160
|
)
|
|
160
|
-
|
|
161
|
-
return Dataset(name=
|
|
161
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
162
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
162
163
|
|
|
163
164
|
@classmethod
|
|
164
165
|
def analyticfunc(
|
|
@@ -22,6 +22,7 @@ from vtlengine.DataTypes import (
|
|
|
22
22
|
from vtlengine.DataTypes.TimeHandling import str_period_to_date
|
|
23
23
|
from vtlengine.Exceptions import SemanticError
|
|
24
24
|
from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar
|
|
25
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
25
26
|
|
|
26
27
|
duration_mapping = {"A": 6, "S": 5, "Q": 4, "M": 3, "W": 2, "D": 1}
|
|
27
28
|
|
|
@@ -331,7 +332,8 @@ class Cast(Operator.Unary):
|
|
|
331
332
|
role=Role.MEASURE,
|
|
332
333
|
nullable=measure.nullable,
|
|
333
334
|
)
|
|
334
|
-
|
|
335
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
336
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
335
337
|
|
|
336
338
|
@classmethod
|
|
337
339
|
def component_validation( # type: ignore[override]
|
|
@@ -346,7 +348,8 @@ class Cast(Operator.Unary):
|
|
|
346
348
|
|
|
347
349
|
from_type = operand.data_type
|
|
348
350
|
cls.check_cast(from_type, to_type, mask)
|
|
349
|
-
|
|
351
|
+
comp_name = VirtualCounter._new_dc_name()
|
|
352
|
+
return DataComponent(name=comp_name, data=None, data_type=to_type, role=operand.role)
|
|
350
353
|
|
|
351
354
|
@classmethod
|
|
352
355
|
def scalar_validation( # type: ignore[override]
|
vtlengine/Operators/Clause.py
CHANGED
|
@@ -15,6 +15,7 @@ from vtlengine.DataTypes import (
|
|
|
15
15
|
from vtlengine.Exceptions import SemanticError
|
|
16
16
|
from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar
|
|
17
17
|
from vtlengine.Operators import Operator
|
|
18
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
18
19
|
|
|
19
20
|
|
|
20
21
|
class Calc(Operator):
|
|
@@ -23,7 +24,8 @@ class Calc(Operator):
|
|
|
23
24
|
@classmethod
|
|
24
25
|
def validate(cls, operands: List[Union[DataComponent, Scalar]], dataset: Dataset) -> Dataset:
|
|
25
26
|
result_components = {name: copy(comp) for name, comp in dataset.components.items()}
|
|
26
|
-
|
|
27
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
28
|
+
result_dataset = Dataset(name=dataset_name, components=result_components, data=None)
|
|
27
29
|
|
|
28
30
|
for operand in operands:
|
|
29
31
|
if operand.name in result_dataset.components:
|
|
@@ -70,7 +72,8 @@ class Aggregate(Operator):
|
|
|
70
72
|
|
|
71
73
|
@classmethod
|
|
72
74
|
def validate(cls, operands: List[Union[DataComponent, Scalar]], dataset: Dataset) -> Dataset:
|
|
73
|
-
|
|
75
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
76
|
+
result_dataset = Dataset(name=dataset_name, components=dataset.components, data=None)
|
|
74
77
|
|
|
75
78
|
for operand in operands:
|
|
76
79
|
if operand.name in dataset.get_identifiers_names() or (
|
|
@@ -122,7 +125,8 @@ class Filter(Operator):
|
|
|
122
125
|
def validate(cls, condition: DataComponent, dataset: Dataset) -> Dataset:
|
|
123
126
|
if condition.data_type != Boolean:
|
|
124
127
|
raise ValueError(f"Filter condition must be of type {Boolean}")
|
|
125
|
-
|
|
128
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
129
|
+
return Dataset(name=dataset_name, components=dataset.components, data=None)
|
|
126
130
|
|
|
127
131
|
@classmethod
|
|
128
132
|
def evaluate(cls, condition: DataComponent, dataset: Dataset) -> Dataset:
|
|
@@ -139,19 +143,20 @@ class Keep(Operator):
|
|
|
139
143
|
|
|
140
144
|
@classmethod
|
|
141
145
|
def validate(cls, operands: List[str], dataset: Dataset) -> Dataset:
|
|
146
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
142
147
|
for operand in operands:
|
|
143
148
|
if operand not in dataset.get_components_names():
|
|
144
149
|
raise SemanticError(
|
|
145
|
-
"1-1-1-10", op=cls.op, comp_name=operand, dataset_name=
|
|
150
|
+
"1-1-1-10", op=cls.op, comp_name=operand, dataset_name=dataset_name
|
|
146
151
|
)
|
|
147
152
|
if dataset.get_component(operand).role == Role.IDENTIFIER:
|
|
148
|
-
raise SemanticError("1-1-6-2", op=cls.op, name=operand, dataset=
|
|
153
|
+
raise SemanticError("1-1-6-2", op=cls.op, name=operand, dataset=dataset_name)
|
|
149
154
|
result_components = {
|
|
150
155
|
name: comp
|
|
151
156
|
for name, comp in dataset.components.items()
|
|
152
157
|
if comp.name in operands or comp.role == Role.IDENTIFIER
|
|
153
158
|
}
|
|
154
|
-
return Dataset(name=
|
|
159
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
155
160
|
|
|
156
161
|
@classmethod
|
|
157
162
|
def evaluate(cls, operands: List[str], dataset: Dataset) -> Dataset:
|
|
@@ -170,17 +175,18 @@ class Drop(Operator):
|
|
|
170
175
|
|
|
171
176
|
@classmethod
|
|
172
177
|
def validate(cls, operands: List[str], dataset: Dataset) -> Dataset:
|
|
178
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
173
179
|
for operand in operands:
|
|
174
180
|
if operand not in dataset.components:
|
|
175
|
-
raise SemanticError("1-1-1-10", comp_name=operand, dataset_name=
|
|
181
|
+
raise SemanticError("1-1-1-10", comp_name=operand, dataset_name=dataset_name)
|
|
176
182
|
if dataset.get_component(operand).role == Role.IDENTIFIER:
|
|
177
|
-
raise SemanticError("1-1-6-2", op=cls.op, name=operand, dataset=
|
|
183
|
+
raise SemanticError("1-1-6-2", op=cls.op, name=operand, dataset=dataset_name)
|
|
178
184
|
if len(dataset.components) == len(operands):
|
|
179
185
|
raise SemanticError("1-1-6-12", op=cls.op)
|
|
180
186
|
result_components = {
|
|
181
187
|
name: comp for name, comp in dataset.components.items() if comp.name not in operands
|
|
182
188
|
}
|
|
183
|
-
return Dataset(name=
|
|
189
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
184
190
|
|
|
185
191
|
@classmethod
|
|
186
192
|
def evaluate(cls, operands: List[str], dataset: Dataset) -> Dataset:
|
|
@@ -195,6 +201,7 @@ class Rename(Operator):
|
|
|
195
201
|
|
|
196
202
|
@classmethod
|
|
197
203
|
def validate(cls, operands: List[RenameNode], dataset: Dataset) -> Dataset:
|
|
204
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
198
205
|
from_names = [operand.old_name for operand in operands]
|
|
199
206
|
if len(from_names) != len(set(from_names)):
|
|
200
207
|
duplicates = set([name for name in from_names if from_names.count(name) > 1])
|
|
@@ -211,14 +218,14 @@ class Rename(Operator):
|
|
|
211
218
|
"1-1-1-10",
|
|
212
219
|
op=cls.op,
|
|
213
220
|
comp_name=operand.old_name,
|
|
214
|
-
dataset_name=
|
|
221
|
+
dataset_name=dataset_name,
|
|
215
222
|
)
|
|
216
223
|
if operand.new_name in dataset.components:
|
|
217
224
|
raise SemanticError(
|
|
218
225
|
"1-1-6-8",
|
|
219
226
|
op=cls.op,
|
|
220
227
|
comp_name=operand.new_name,
|
|
221
|
-
dataset_name=
|
|
228
|
+
dataset_name=dataset_name,
|
|
222
229
|
)
|
|
223
230
|
|
|
224
231
|
result_components = {comp.name: comp for comp in dataset.components.values()}
|
|
@@ -230,8 +237,7 @@ class Rename(Operator):
|
|
|
230
237
|
nullable=result_components[operand.old_name].nullable,
|
|
231
238
|
)
|
|
232
239
|
del result_components[operand.old_name]
|
|
233
|
-
|
|
234
|
-
return Dataset(name=dataset.name, components=result_components, data=None)
|
|
240
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
235
241
|
|
|
236
242
|
@classmethod
|
|
237
243
|
def evaluate(cls, operands: List[RenameNode], dataset: Dataset) -> Dataset:
|
|
@@ -256,6 +262,7 @@ class Pivot(Operator):
|
|
|
256
262
|
class Unpivot(Operator):
|
|
257
263
|
@classmethod
|
|
258
264
|
def validate(cls, operands: List[str], dataset: Dataset) -> Dataset:
|
|
265
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
259
266
|
if len(operands) != 2:
|
|
260
267
|
raise ValueError("Unpivot clause requires two operands")
|
|
261
268
|
identifier, measure = operands
|
|
@@ -263,10 +270,10 @@ class Unpivot(Operator):
|
|
|
263
270
|
if len(dataset.get_identifiers()) < 1:
|
|
264
271
|
raise SemanticError("1-3-27", op=cls.op)
|
|
265
272
|
if identifier in dataset.components:
|
|
266
|
-
raise SemanticError("1-1-6-2", op=cls.op, name=identifier, dataset=
|
|
273
|
+
raise SemanticError("1-1-6-2", op=cls.op, name=identifier, dataset=dataset_name)
|
|
267
274
|
|
|
268
275
|
result_components = {comp.name: comp for comp in dataset.get_identifiers()}
|
|
269
|
-
result_dataset = Dataset(name=
|
|
276
|
+
result_dataset = Dataset(name=dataset_name, components=result_components, data=None)
|
|
270
277
|
# noinspection PyTypeChecker
|
|
271
278
|
result_dataset.add_component(
|
|
272
279
|
Component(name=identifier, data_type=String, role=Role.IDENTIFIER, nullable=False)
|
|
@@ -306,6 +313,7 @@ class Sub(Operator):
|
|
|
306
313
|
|
|
307
314
|
@classmethod
|
|
308
315
|
def validate(cls, operands: List[DataComponent], dataset: Dataset) -> Dataset:
|
|
316
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
309
317
|
if len(dataset.get_identifiers()) < 1:
|
|
310
318
|
raise SemanticError("1-3-27", op=cls.op)
|
|
311
319
|
for operand in operands:
|
|
@@ -314,14 +322,14 @@ class Sub(Operator):
|
|
|
314
322
|
"1-1-1-10",
|
|
315
323
|
op=cls.op,
|
|
316
324
|
comp_name=operand.name,
|
|
317
|
-
dataset_name=
|
|
325
|
+
dataset_name=dataset_name,
|
|
318
326
|
)
|
|
319
327
|
if operand.role != Role.IDENTIFIER:
|
|
320
328
|
raise SemanticError(
|
|
321
329
|
"1-1-6-10",
|
|
322
330
|
op=cls.op,
|
|
323
331
|
operand=operand.name,
|
|
324
|
-
dataset_name=
|
|
332
|
+
dataset_name=dataset_name,
|
|
325
333
|
)
|
|
326
334
|
if isinstance(operand, Scalar):
|
|
327
335
|
raise SemanticError("1-1-6-5", op=cls.op, name=operand.name)
|
|
@@ -331,7 +339,7 @@ class Sub(Operator):
|
|
|
331
339
|
for name, comp in dataset.components.items()
|
|
332
340
|
if comp.name not in [operand.name for operand in operands]
|
|
333
341
|
}
|
|
334
|
-
return Dataset(name=
|
|
342
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
335
343
|
|
|
336
344
|
@classmethod
|
|
337
345
|
def evaluate(cls, operands: List[DataComponent], dataset: Dataset) -> Dataset:
|
|
@@ -25,6 +25,7 @@ from vtlengine.AST.Grammar.tokens import (
|
|
|
25
25
|
from vtlengine.DataTypes import COMP_NAME_MAPPING, Boolean, Null, Number, String
|
|
26
26
|
from vtlengine.Exceptions import SemanticError
|
|
27
27
|
from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar, ScalarSet
|
|
28
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
28
29
|
|
|
29
30
|
|
|
30
31
|
class Unary(Operator.Unary):
|
|
@@ -403,6 +404,7 @@ class ExistIn(Operator.Operator):
|
|
|
403
404
|
def validate(
|
|
404
405
|
cls, dataset_1: Dataset, dataset_2: Dataset, retain_element: Optional[Boolean]
|
|
405
406
|
) -> Any:
|
|
407
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
406
408
|
left_identifiers = dataset_1.get_identifiers_names()
|
|
407
409
|
right_identifiers = dataset_2.get_identifiers_names()
|
|
408
410
|
|
|
@@ -412,7 +414,7 @@ class ExistIn(Operator.Operator):
|
|
|
412
414
|
raise ValueError("Datasets must have common identifiers")
|
|
413
415
|
|
|
414
416
|
result_components = {comp.name: copy(comp) for comp in dataset_1.get_identifiers()}
|
|
415
|
-
result_dataset = Dataset(name=
|
|
417
|
+
result_dataset = Dataset(name=dataset_name, components=result_components, data=None)
|
|
416
418
|
result_dataset.add_component(
|
|
417
419
|
Component(name="bool_var", data_type=Boolean, role=Role.MEASURE, nullable=False)
|
|
418
420
|
)
|
|
@@ -19,6 +19,7 @@ from vtlengine.DataTypes import (
|
|
|
19
19
|
from vtlengine.Exceptions import SemanticError
|
|
20
20
|
from vtlengine.Model import DataComponent, Dataset, Role, Scalar
|
|
21
21
|
from vtlengine.Operators import Binary, Operator
|
|
22
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
22
23
|
|
|
23
24
|
|
|
24
25
|
class If(Operator):
|
|
@@ -75,8 +76,8 @@ class If(Operator):
|
|
|
75
76
|
) -> Dataset:
|
|
76
77
|
ids = condition.get_identifiers_names()
|
|
77
78
|
condition_measure = condition.get_measures_names()[0]
|
|
78
|
-
true_data = condition.data[condition.data[condition_measure] == True]
|
|
79
|
-
false_data = condition.data[condition.data[condition_measure] != True]
|
|
79
|
+
true_data = condition.data[condition.data[condition_measure].dropna() == True]
|
|
80
|
+
false_data = condition.data[condition.data[condition_measure] != True]
|
|
80
81
|
|
|
81
82
|
if isinstance(true_branch, Dataset):
|
|
82
83
|
if len(true_data) > 0 and true_branch.data is not None:
|
|
@@ -84,7 +85,7 @@ class If(Operator):
|
|
|
84
85
|
true_data,
|
|
85
86
|
true_branch.data,
|
|
86
87
|
on=ids,
|
|
87
|
-
how="
|
|
88
|
+
how="left",
|
|
88
89
|
suffixes=("_condition", ""),
|
|
89
90
|
)
|
|
90
91
|
else:
|
|
@@ -99,7 +100,7 @@ class If(Operator):
|
|
|
99
100
|
false_data,
|
|
100
101
|
false_branch.data,
|
|
101
102
|
on=ids,
|
|
102
|
-
how="
|
|
103
|
+
how="left",
|
|
103
104
|
suffixes=("_condition", ""),
|
|
104
105
|
)
|
|
105
106
|
else:
|
|
@@ -113,12 +114,12 @@ class If(Operator):
|
|
|
113
114
|
pd.concat([true_data, false_data], ignore_index=True)
|
|
114
115
|
.drop_duplicates()
|
|
115
116
|
.sort_values(by=ids)
|
|
116
|
-
)
|
|
117
|
+
).reset_index(drop=True)
|
|
117
118
|
if isinstance(result, Dataset):
|
|
118
119
|
drop_columns = [
|
|
119
120
|
column for column in result.data.columns if column not in result.components
|
|
120
121
|
]
|
|
121
|
-
result.data = result.data.
|
|
122
|
+
result.data = result.data.drop(columns=drop_columns)
|
|
122
123
|
if isinstance(true_branch, Scalar) and isinstance(false_branch, Scalar):
|
|
123
124
|
result.get_measures()[0].data_type = true_branch.data_type
|
|
124
125
|
result.get_measures()[0].name = COMP_NAME_MAPPING[true_branch.data_type]
|
|
@@ -135,6 +136,7 @@ class If(Operator):
|
|
|
135
136
|
nullable = False
|
|
136
137
|
left = true_branch
|
|
137
138
|
right = false_branch
|
|
139
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
138
140
|
if true_branch.__class__ != false_branch.__class__:
|
|
139
141
|
if (isinstance(true_branch, DataComponent) and isinstance(false_branch, Dataset)) or (
|
|
140
142
|
isinstance(true_branch, Dataset) and isinstance(false_branch, DataComponent)
|
|
@@ -147,6 +149,7 @@ class If(Operator):
|
|
|
147
149
|
right = true_branch
|
|
148
150
|
|
|
149
151
|
# Datacomponent
|
|
152
|
+
comp_name = VirtualCounter._new_dc_name()
|
|
150
153
|
if isinstance(condition, DataComponent):
|
|
151
154
|
if not condition.data_type == Boolean:
|
|
152
155
|
raise SemanticError(
|
|
@@ -164,7 +167,7 @@ class If(Operator):
|
|
|
164
167
|
if isinstance(right, DataComponent):
|
|
165
168
|
nullable |= right.nullable
|
|
166
169
|
return DataComponent(
|
|
167
|
-
name=
|
|
170
|
+
name=comp_name,
|
|
168
171
|
data=None,
|
|
169
172
|
data_type=binary_implicit_promotion(left.data_type, right.data_type),
|
|
170
173
|
role=Role.MEASURE,
|
|
@@ -184,7 +187,7 @@ class If(Operator):
|
|
|
184
187
|
left.data_type = right.data_type = binary_implicit_promotion(
|
|
185
188
|
left.data_type, right.data_type
|
|
186
189
|
)
|
|
187
|
-
return Dataset(name=
|
|
190
|
+
return Dataset(name=dataset_name, components=copy(condition.components), data=None)
|
|
188
191
|
if left.get_identifiers() != condition.get_identifiers():
|
|
189
192
|
raise SemanticError("1-1-9-10", op=cls.op, clause=left.name)
|
|
190
193
|
if isinstance(right, Scalar):
|
|
@@ -218,7 +221,7 @@ class If(Operator):
|
|
|
218
221
|
if left.get_identifiers() != condition.get_identifiers():
|
|
219
222
|
raise SemanticError("1-1-9-6", op=cls.op)
|
|
220
223
|
result_components = {comp_name: copy(comp) for comp_name, comp in left.components.items()}
|
|
221
|
-
return Dataset(name=
|
|
224
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
222
225
|
|
|
223
226
|
|
|
224
227
|
class Nvl(Binary):
|
|
@@ -254,6 +257,8 @@ class Nvl(Binary):
|
|
|
254
257
|
|
|
255
258
|
@classmethod
|
|
256
259
|
def validate(cls, left: Any, right: Any) -> Union[Scalar, DataComponent, Dataset]:
|
|
260
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
261
|
+
comp_name = VirtualCounter._new_dc_name()
|
|
257
262
|
result_components = {}
|
|
258
263
|
if isinstance(left, Scalar):
|
|
259
264
|
if not isinstance(right, Scalar):
|
|
@@ -271,7 +276,7 @@ class Nvl(Binary):
|
|
|
271
276
|
)
|
|
272
277
|
cls.type_validation(left.data_type, right.data_type)
|
|
273
278
|
return DataComponent(
|
|
274
|
-
name=
|
|
279
|
+
name=comp_name,
|
|
275
280
|
data=pd.Series(dtype=object),
|
|
276
281
|
data_type=left.data_type,
|
|
277
282
|
role=Role.MEASURE,
|
|
@@ -298,7 +303,7 @@ class Nvl(Binary):
|
|
|
298
303
|
}
|
|
299
304
|
for comp in result_components.values():
|
|
300
305
|
comp.nullable = False
|
|
301
|
-
return Dataset(name=
|
|
306
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
302
307
|
|
|
303
308
|
|
|
304
309
|
class Case(Operator):
|
|
@@ -393,6 +398,8 @@ class Case(Operator):
|
|
|
393
398
|
def validate(
|
|
394
399
|
cls, conditions: List[Any], thenOps: List[Any], elseOp: Any
|
|
395
400
|
) -> Union[Scalar, DataComponent, Dataset]:
|
|
401
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
402
|
+
comp_name = VirtualCounter._new_dc_name()
|
|
396
403
|
if len(set(map(type, conditions))) > 1:
|
|
397
404
|
raise SemanticError("2-1-9-1", op=cls.op)
|
|
398
405
|
|
|
@@ -436,7 +443,7 @@ class Case(Operator):
|
|
|
436
443
|
data_type = binary_implicit_promotion(data_type, op.data_type)
|
|
437
444
|
|
|
438
445
|
return DataComponent(
|
|
439
|
-
name=
|
|
446
|
+
name=comp_name,
|
|
440
447
|
data=None,
|
|
441
448
|
data_type=data_type,
|
|
442
449
|
role=Role.MEASURE,
|
|
@@ -459,4 +466,4 @@ class Case(Operator):
|
|
|
459
466
|
if isinstance(op, Dataset) and op.get_components_names() != comp_names:
|
|
460
467
|
raise SemanticError("2-1-9-7", op=cls.op)
|
|
461
468
|
|
|
462
|
-
return Dataset(name=
|
|
469
|
+
return Dataset(name=dataset_name, components=components, data=None)
|
vtlengine/Operators/General.py
CHANGED
|
@@ -7,6 +7,7 @@ from vtlengine.DataTypes import COMP_NAME_MAPPING
|
|
|
7
7
|
from vtlengine.Exceptions import SemanticError
|
|
8
8
|
from vtlengine.Model import Component, DataComponent, Dataset, ExternalRoutine, Role
|
|
9
9
|
from vtlengine.Operators import Binary, Unary
|
|
10
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
class Membership(Binary):
|
|
@@ -21,6 +22,7 @@ class Membership(Binary):
|
|
|
21
22
|
|
|
22
23
|
@classmethod
|
|
23
24
|
def validate(cls, left_operand: Any, right_operand: Any) -> Dataset:
|
|
25
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
24
26
|
if right_operand not in left_operand.components:
|
|
25
27
|
raise SemanticError(
|
|
26
28
|
"1-1-1-10",
|
|
@@ -46,7 +48,7 @@ class Membership(Binary):
|
|
|
46
48
|
for name, comp in left_operand.components.items()
|
|
47
49
|
if comp.role == Role.IDENTIFIER or comp.name == right_operand
|
|
48
50
|
}
|
|
49
|
-
result_dataset = Dataset(name=
|
|
51
|
+
result_dataset = Dataset(name=dataset_name, components=result_components, data=None)
|
|
50
52
|
return result_dataset
|
|
51
53
|
|
|
52
54
|
@classmethod
|
|
@@ -9,6 +9,7 @@ import vtlengine.Operators as Operators
|
|
|
9
9
|
from vtlengine.AST.Grammar.tokens import HIERARCHY
|
|
10
10
|
from vtlengine.DataTypes import Boolean, Number
|
|
11
11
|
from vtlengine.Model import Component, DataComponent, Dataset, Role
|
|
12
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
def get_measure_from_dataset(dataset: Dataset, code_item: str) -> DataComponent:
|
|
@@ -221,10 +222,11 @@ class Hierarchy(Operators.Operator):
|
|
|
221
222
|
def validate(
|
|
222
223
|
cls, dataset: Dataset, computed_dict: Dict[str, DataFrame], output: str
|
|
223
224
|
) -> Dataset:
|
|
225
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
224
226
|
result_components = {
|
|
225
227
|
comp_name: copy(comp) for comp_name, comp in dataset.components.items()
|
|
226
228
|
}
|
|
227
|
-
return Dataset(name=
|
|
229
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
228
230
|
|
|
229
231
|
@classmethod
|
|
230
232
|
def evaluate(
|
vtlengine/Operators/Join.py
CHANGED
|
@@ -13,6 +13,7 @@ from vtlengine.DataTypes import binary_implicit_promotion
|
|
|
13
13
|
from vtlengine.Exceptions import SemanticError
|
|
14
14
|
from vtlengine.Model import Component, Dataset, Role
|
|
15
15
|
from vtlengine.Operators import Operator, _id_type_promotion_join_keys
|
|
16
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
class Join(Operator):
|
|
@@ -196,12 +197,13 @@ class Join(Operator):
|
|
|
196
197
|
|
|
197
198
|
@classmethod
|
|
198
199
|
def validate(cls, operands: List[Dataset], using: Optional[List[str]]) -> Dataset:
|
|
200
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
199
201
|
if len(operands) < 1 or sum([isinstance(op, Dataset) for op in operands]) < 1:
|
|
200
202
|
raise Exception("Join operator requires at least 1 dataset")
|
|
201
203
|
if not all(isinstance(op, Dataset) for op in operands):
|
|
202
204
|
raise SemanticError("1-1-13-10")
|
|
203
205
|
if len(operands) == 1 and isinstance(operands[0], Dataset):
|
|
204
|
-
return Dataset(name=
|
|
206
|
+
return Dataset(name=dataset_name, components=operands[0].components, data=None)
|
|
205
207
|
for op in operands:
|
|
206
208
|
if len(op.get_identifiers()) == 0:
|
|
207
209
|
raise SemanticError("1-3-27", op=cls.op)
|
|
@@ -215,7 +217,7 @@ class Join(Operator):
|
|
|
215
217
|
if len(set(components.keys())) != len(components):
|
|
216
218
|
raise SemanticError("1-1-13-9", comp_name="")
|
|
217
219
|
|
|
218
|
-
return Dataset(name=
|
|
220
|
+
return Dataset(name=dataset_name, components=components, data=None)
|
|
219
221
|
|
|
220
222
|
@classmethod
|
|
221
223
|
def identifiers_validation(cls, operands: List[Dataset], using: Optional[List[str]]) -> None:
|