vtlengine 1.1rc2__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +288 -29
- vtlengine/API/__init__.py +277 -70
- vtlengine/AST/ASTComment.py +56 -0
- vtlengine/AST/ASTConstructor.py +71 -18
- vtlengine/AST/ASTConstructorModules/Expr.py +197 -75
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +81 -38
- vtlengine/AST/ASTConstructorModules/Terminals.py +76 -31
- vtlengine/AST/ASTConstructorModules/__init__.py +50 -0
- vtlengine/AST/ASTEncoders.py +4 -0
- vtlengine/AST/ASTString.py +622 -0
- vtlengine/AST/ASTTemplate.py +28 -2
- vtlengine/AST/DAG/__init__.py +44 -6
- vtlengine/AST/DAG/_words.py +1 -0
- vtlengine/AST/Grammar/Vtl.g4 +7 -7
- vtlengine/AST/Grammar/lexer.py +19759 -1112
- vtlengine/AST/Grammar/parser.py +17996 -3199
- vtlengine/AST/__init__.py +127 -14
- vtlengine/Exceptions/messages.py +14 -2
- vtlengine/Interpreter/__init__.py +90 -11
- vtlengine/Model/__init__.py +9 -4
- vtlengine/Operators/Aggregation.py +13 -6
- vtlengine/Operators/Analytic.py +19 -13
- vtlengine/Operators/CastOperator.py +5 -2
- vtlengine/Operators/Clause.py +26 -18
- vtlengine/Operators/Comparison.py +3 -1
- vtlengine/Operators/Conditional.py +40 -18
- vtlengine/Operators/General.py +3 -1
- vtlengine/Operators/HROperators.py +3 -1
- vtlengine/Operators/Join.py +4 -2
- vtlengine/Operators/Time.py +22 -15
- vtlengine/Operators/Validation.py +5 -2
- vtlengine/Operators/__init__.py +15 -8
- vtlengine/Utils/__Virtual_Assets.py +34 -0
- vtlengine/Utils/__init__.py +49 -0
- vtlengine/__init__.py +4 -2
- vtlengine/files/parser/__init__.py +16 -26
- vtlengine/files/parser/_rfc_dialect.py +1 -1
- vtlengine/py.typed +0 -0
- vtlengine-1.2.0.dist-info/METADATA +92 -0
- vtlengine-1.2.0.dist-info/RECORD +63 -0
- {vtlengine-1.1rc2.dist-info → vtlengine-1.2.0.dist-info}/WHEEL +1 -1
- vtlengine-1.1rc2.dist-info/METADATA +0 -248
- vtlengine-1.1rc2.dist-info/RECORD +0 -59
- {vtlengine-1.1rc2.dist-info → vtlengine-1.2.0.dist-info}/LICENSE.md +0 -0
|
@@ -8,9 +8,10 @@ import pandas as pd
|
|
|
8
8
|
import vtlengine.AST as AST
|
|
9
9
|
import vtlengine.Exceptions
|
|
10
10
|
import vtlengine.Operators as Operators
|
|
11
|
+
from vtlengine.AST import VarID
|
|
11
12
|
from vtlengine.AST.ASTTemplate import ASTTemplate
|
|
12
13
|
from vtlengine.AST.DAG import HRDAGAnalyzer
|
|
13
|
-
from vtlengine.AST.DAG._words import DELETE, GLOBAL, INSERT
|
|
14
|
+
from vtlengine.AST.DAG._words import DELETE, GLOBAL, INSERT, PERSISTENT
|
|
14
15
|
from vtlengine.AST.Grammar.tokens import (
|
|
15
16
|
AGGREGATE,
|
|
16
17
|
ALL,
|
|
@@ -96,6 +97,7 @@ from vtlengine.Utils import (
|
|
|
96
97
|
THEN_ELSE,
|
|
97
98
|
UNARY_MAPPING,
|
|
98
99
|
)
|
|
100
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
99
101
|
|
|
100
102
|
|
|
101
103
|
# noinspection PyTypeChecker
|
|
@@ -103,6 +105,7 @@ from vtlengine.Utils import (
|
|
|
103
105
|
class InterpreterAnalyzer(ASTTemplate):
|
|
104
106
|
# Model elements
|
|
105
107
|
datasets: Dict[str, Dataset]
|
|
108
|
+
scalars: Optional[Dict[str, Scalar]] = None
|
|
106
109
|
value_domains: Optional[Dict[str, ValueDomain]] = None
|
|
107
110
|
external_routines: Optional[Dict[str, ExternalRoutine]] = None
|
|
108
111
|
# Analysis mode
|
|
@@ -113,6 +116,8 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
113
116
|
output_path: Optional[Union[str, Path]] = None
|
|
114
117
|
# Time Period Representation
|
|
115
118
|
time_period_representation: Optional[TimePeriodRepresentation] = None
|
|
119
|
+
# Return only persistent
|
|
120
|
+
return_only_persistent: bool = True
|
|
116
121
|
# Flags to change behavior
|
|
117
122
|
nested_condition: Union[str, bool] = False
|
|
118
123
|
is_from_assignment: bool = False
|
|
@@ -188,7 +193,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
188
193
|
# We do not save global input datasets, only results of transformations
|
|
189
194
|
self.datasets[ds_name].data = None
|
|
190
195
|
continue
|
|
191
|
-
|
|
196
|
+
if self.return_only_persistent and ds_name not in self.ds_analysis[PERSISTENT]:
|
|
197
|
+
self.datasets[ds_name].data = None
|
|
198
|
+
continue
|
|
192
199
|
# Saving only datasets, no scalars
|
|
193
200
|
save_datapoints(
|
|
194
201
|
self.time_period_representation,
|
|
@@ -197,6 +204,15 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
197
204
|
)
|
|
198
205
|
self.datasets[ds_name].data = None
|
|
199
206
|
|
|
207
|
+
def _save_scalars_efficient(self, scalars: Dict[str, Scalar]) -> None:
|
|
208
|
+
output_path = Path(self.output_path) # type: ignore[arg-type]
|
|
209
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
|
210
|
+
|
|
211
|
+
for name, scalar in scalars.items():
|
|
212
|
+
file_path = output_path / f"{name}.csv"
|
|
213
|
+
df = pd.DataFrame([[scalar.value]] if scalar.value is not None else [[]])
|
|
214
|
+
df.to_csv(file_path, header=False, index=False)
|
|
215
|
+
|
|
200
216
|
# **********************************
|
|
201
217
|
# * *
|
|
202
218
|
# * AST Visitors *
|
|
@@ -210,6 +226,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
210
226
|
else:
|
|
211
227
|
Operators.only_semantic = False
|
|
212
228
|
results = {}
|
|
229
|
+
scalars_to_save = set()
|
|
213
230
|
for child in node.children:
|
|
214
231
|
if isinstance(child, (AST.Assignment, AST.PersistentAssignment)):
|
|
215
232
|
vtlengine.Exceptions.dataset_output = child.left.value # type: ignore[attr-defined]
|
|
@@ -227,6 +244,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
227
244
|
self.else_condition_dataset = None
|
|
228
245
|
self.nested_condition = False
|
|
229
246
|
|
|
247
|
+
# Reset VirtualCounter
|
|
248
|
+
VirtualCounter.reset()
|
|
249
|
+
|
|
230
250
|
if result is None:
|
|
231
251
|
continue
|
|
232
252
|
|
|
@@ -235,9 +255,22 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
235
255
|
# Save results
|
|
236
256
|
self.datasets[result.name] = copy(result)
|
|
237
257
|
results[result.name] = result
|
|
258
|
+
if isinstance(result, Scalar):
|
|
259
|
+
scalars_to_save.add(result.name)
|
|
260
|
+
if self.scalars is None:
|
|
261
|
+
self.scalars = {}
|
|
262
|
+
self.scalars[result.name] = copy(result)
|
|
238
263
|
self._save_datapoints_efficient(statement_num)
|
|
239
264
|
statement_num += 1
|
|
240
265
|
|
|
266
|
+
if self.output_path is not None and scalars_to_save:
|
|
267
|
+
scalars_filtered = {
|
|
268
|
+
name: self.scalars[name] # type: ignore[index]
|
|
269
|
+
for name in scalars_to_save
|
|
270
|
+
if (not self.return_only_persistent or name in self.ds_analysis.get(PERSISTENT, [])) # type: ignore[union-attr]
|
|
271
|
+
}
|
|
272
|
+
self._save_scalars_efficient(scalars_filtered)
|
|
273
|
+
|
|
241
274
|
return results
|
|
242
275
|
|
|
243
276
|
# Definition Language
|
|
@@ -335,7 +368,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
335
368
|
for i, rule in enumerate(node.rules):
|
|
336
369
|
rule.name = (i + 1).__str__()
|
|
337
370
|
|
|
338
|
-
cond_comp = []
|
|
371
|
+
cond_comp: List[Any] = []
|
|
339
372
|
if isinstance(node.element, list):
|
|
340
373
|
cond_comp = [x.value for x in node.element[:-1]]
|
|
341
374
|
node.element = node.element[-1]
|
|
@@ -369,6 +402,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
369
402
|
def visit_PersistentAssignment(self, node: AST.PersistentAssignment) -> Any:
|
|
370
403
|
return self.visit_Assignment(node)
|
|
371
404
|
|
|
405
|
+
def visit_ParFunction(self, node: AST.ParFunction) -> Any:
|
|
406
|
+
return self.visit(node.operand)
|
|
407
|
+
|
|
372
408
|
def visit_BinOp(self, node: AST.BinOp) -> Any:
|
|
373
409
|
is_from_if = False
|
|
374
410
|
if (
|
|
@@ -390,7 +426,13 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
390
426
|
comp_name = f"{node.left.value}#{self.udo_params[-1][node.right.value]}"
|
|
391
427
|
else:
|
|
392
428
|
comp_name = f"{node.left.value}#{node.right.value}"
|
|
393
|
-
ast_var_id = AST.VarID(
|
|
429
|
+
ast_var_id = AST.VarID(
|
|
430
|
+
value=comp_name,
|
|
431
|
+
line_start=node.right.line_start,
|
|
432
|
+
line_stop=node.right.line_stop,
|
|
433
|
+
column_start=node.right.column_start,
|
|
434
|
+
column_stop=node.right.column_stop,
|
|
435
|
+
)
|
|
394
436
|
return self.visit(ast_var_id)
|
|
395
437
|
left_operand = self.visit(node.left)
|
|
396
438
|
right_operand = self.visit(node.right)
|
|
@@ -515,7 +557,10 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
515
557
|
# Setting here group by as we have already selected the identifiers we need
|
|
516
558
|
grouping_op = "group by"
|
|
517
559
|
|
|
518
|
-
|
|
560
|
+
result = AGGREGATION_MAPPING[node.op].analyze(operand, grouping_op, groupings, having)
|
|
561
|
+
if not self.is_from_regular_aggregation:
|
|
562
|
+
result.name = VirtualCounter._new_ds_name()
|
|
563
|
+
return result
|
|
519
564
|
|
|
520
565
|
def _format_having_expression_udo(self, having: str) -> str:
|
|
521
566
|
if self.udo_params is None:
|
|
@@ -572,6 +617,8 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
572
617
|
for comp_name in node.partition_by:
|
|
573
618
|
if comp_name in self.udo_params[-1]:
|
|
574
619
|
partitioning.append(self.udo_params[-1][comp_name])
|
|
620
|
+
elif comp_name in operand.get_identifiers_names():
|
|
621
|
+
partitioning.append(comp_name)
|
|
575
622
|
else:
|
|
576
623
|
raise SemanticError(
|
|
577
624
|
"2-3-9",
|
|
@@ -583,7 +630,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
583
630
|
for o in node.order_by:
|
|
584
631
|
if o.component in self.udo_params[-1]:
|
|
585
632
|
o.component = self.udo_params[-1][o.component]
|
|
586
|
-
|
|
633
|
+
elif o.component not in operand.get_identifiers_names():
|
|
587
634
|
raise SemanticError(
|
|
588
635
|
"2-3-9",
|
|
589
636
|
comp_type="Component",
|
|
@@ -749,10 +796,10 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
749
796
|
if self.is_from_join and node.value in self.datasets:
|
|
750
797
|
return self.datasets[node.value]
|
|
751
798
|
if self.regular_aggregation_dataset is not None:
|
|
752
|
-
if
|
|
799
|
+
if self.scalars is not None and node.value in self.scalars:
|
|
753
800
|
if node.value in self.regular_aggregation_dataset.components:
|
|
754
801
|
raise SemanticError("1-1-6-11", comp_name=node.value)
|
|
755
|
-
return self.
|
|
802
|
+
return self.scalars[node.value]
|
|
756
803
|
if self.regular_aggregation_dataset.data is not None:
|
|
757
804
|
if (
|
|
758
805
|
self.is_from_join
|
|
@@ -778,7 +825,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
778
825
|
)
|
|
779
826
|
elif is_partial_present == 2:
|
|
780
827
|
raise SemanticError("1-1-13-9", comp_name=node.value)
|
|
781
|
-
node.value = found_comp
|
|
828
|
+
node.value = found_comp # type:ignore[assignment]
|
|
782
829
|
if node.value not in self.regular_aggregation_dataset.components:
|
|
783
830
|
raise SemanticError(
|
|
784
831
|
"1-1-1-10",
|
|
@@ -817,8 +864,11 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
817
864
|
role=self.ruleset_dataset.components[comp_name].role,
|
|
818
865
|
nullable=self.ruleset_dataset.components[comp_name].nullable,
|
|
819
866
|
)
|
|
867
|
+
if self.scalars and node.value in self.scalars:
|
|
868
|
+
return self.scalars[node.value]
|
|
820
869
|
if node.value not in self.datasets:
|
|
821
870
|
raise SemanticError("2-3-6", dataset_name=node.value)
|
|
871
|
+
|
|
822
872
|
return self.datasets[node.value]
|
|
823
873
|
|
|
824
874
|
def visit_Collection(self, node: AST.Collection) -> Any:
|
|
@@ -1273,6 +1323,10 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1273
1323
|
signature_type=hr_info["node"].signature_type,
|
|
1274
1324
|
element=hr_info["node"].element,
|
|
1275
1325
|
rules=aux,
|
|
1326
|
+
line_start=node.line_start,
|
|
1327
|
+
line_stop=node.line_stop,
|
|
1328
|
+
column_start=node.column_start,
|
|
1329
|
+
column_stop=node.column_stop,
|
|
1276
1330
|
)
|
|
1277
1331
|
HRDAGAnalyzer().visit(hierarchy_ast)
|
|
1278
1332
|
|
|
@@ -1580,8 +1634,19 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1580
1634
|
)
|
|
1581
1635
|
}
|
|
1582
1636
|
)
|
|
1637
|
+
|
|
1638
|
+
if self.condition_stack and len(self.condition_stack) > 0:
|
|
1639
|
+
last_condition_dataset = (
|
|
1640
|
+
self.then_condition_dataset[-1]
|
|
1641
|
+
if self.condition_stack[-1] == THEN_ELSE["then"]
|
|
1642
|
+
else (self.else_condition_dataset[-1])
|
|
1643
|
+
)
|
|
1644
|
+
measure_name = last_condition_dataset.get_measures_names()[0]
|
|
1645
|
+
then_data = then_data[then_data[name].isin(last_condition_dataset.data[measure_name])]
|
|
1646
|
+
else_data = else_data[else_data[name].isin(last_condition_dataset.data[measure_name])]
|
|
1583
1647
|
then_dataset = Dataset(name=name, components=components, data=then_data)
|
|
1584
1648
|
else_dataset = Dataset(name=name, components=components, data=else_data)
|
|
1649
|
+
|
|
1585
1650
|
self.then_condition_dataset.append(then_dataset)
|
|
1586
1651
|
self.else_condition_dataset.append(else_dataset)
|
|
1587
1652
|
|
|
@@ -1592,11 +1657,13 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1592
1657
|
or self.condition_stack is None
|
|
1593
1658
|
):
|
|
1594
1659
|
return left_operand, right_operand
|
|
1660
|
+
|
|
1595
1661
|
merge_dataset = (
|
|
1596
1662
|
self.then_condition_dataset.pop()
|
|
1597
1663
|
if self.condition_stack.pop() == THEN_ELSE["then"]
|
|
1598
1664
|
else (self.else_condition_dataset.pop())
|
|
1599
1665
|
)
|
|
1666
|
+
|
|
1600
1667
|
merge_index = merge_dataset.data[merge_dataset.get_measures_names()[0]].to_list()
|
|
1601
1668
|
ids = merge_dataset.get_identifiers_names()
|
|
1602
1669
|
if isinstance(left_operand, (Dataset, DataComponent)):
|
|
@@ -1873,8 +1940,20 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1873
1940
|
return result
|
|
1874
1941
|
|
|
1875
1942
|
def visit_TimeAggregation(self, node: AST.TimeAggregation) -> None:
|
|
1876
|
-
|
|
1877
|
-
|
|
1943
|
+
if node.operand is not None:
|
|
1944
|
+
operand = self.visit(node.operand)
|
|
1945
|
+
else:
|
|
1946
|
+
if self.aggregation_dataset is None:
|
|
1947
|
+
raise SemanticError("1-1-19-11")
|
|
1948
|
+
component_name = Time_Aggregation._get_time_id(self.aggregation_dataset)
|
|
1949
|
+
ast_operand = VarID(
|
|
1950
|
+
value=component_name,
|
|
1951
|
+
line_start=node.line_start,
|
|
1952
|
+
line_stop=node.line_stop,
|
|
1953
|
+
column_start=node.column_start,
|
|
1954
|
+
column_stop=node.column_stop,
|
|
1955
|
+
)
|
|
1956
|
+
operand = self.visit(ast_operand)
|
|
1878
1957
|
return Time_Aggregation.analyze(
|
|
1879
1958
|
operand=operand,
|
|
1880
1959
|
period_from=node.period_from,
|
vtlengine/Model/__init__.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import inspect
|
|
1
2
|
import json
|
|
2
3
|
from collections import Counter
|
|
3
4
|
from dataclasses import dataclass
|
|
@@ -128,10 +129,14 @@ class Component:
|
|
|
128
129
|
)
|
|
129
130
|
|
|
130
131
|
def to_dict(self) -> Dict[str, Any]:
|
|
132
|
+
data_type = self.data_type
|
|
133
|
+
if not inspect.isclass(self.data_type):
|
|
134
|
+
data_type = self.data_type.__class__ # type: ignore[assignment]
|
|
131
135
|
return {
|
|
132
136
|
"name": self.name,
|
|
133
|
-
"data_type": DataTypes.SCALAR_TYPES_CLASS_REVERSE[
|
|
134
|
-
|
|
137
|
+
"data_type": DataTypes.SCALAR_TYPES_CLASS_REVERSE[data_type],
|
|
138
|
+
# Need to check here for NoneType as UDO argument has it
|
|
139
|
+
"role": self.role.value if self.role is not None else None, # type: ignore[redundant-expr]
|
|
135
140
|
"nullable": self.nullable,
|
|
136
141
|
}
|
|
137
142
|
|
|
@@ -229,11 +234,11 @@ class Dataset:
|
|
|
229
234
|
self.data[comp.name] = self.data[comp.name].astype(str)
|
|
230
235
|
other.data[comp.name] = other.data[comp.name].astype(str)
|
|
231
236
|
self.data[comp.name] = self.data[comp.name].map(
|
|
232
|
-
lambda x: str(TimePeriodHandler(x)) if x != "" else "",
|
|
237
|
+
lambda x: str(TimePeriodHandler(str(x))) if x != "" else "",
|
|
233
238
|
na_action="ignore",
|
|
234
239
|
)
|
|
235
240
|
other.data[comp.name] = other.data[comp.name].map(
|
|
236
|
-
lambda x: str(TimePeriodHandler(x)) if x != "" else "",
|
|
241
|
+
lambda x: str(TimePeriodHandler(str(x))) if x != "" else "",
|
|
237
242
|
na_action="ignore",
|
|
238
243
|
)
|
|
239
244
|
elif type_name in ["Integer", "Number"]:
|
|
@@ -78,7 +78,7 @@ class Aggregation(Operator.Unary):
|
|
|
78
78
|
data[measure.name] = (
|
|
79
79
|
data[measure.name]
|
|
80
80
|
.astype(object)
|
|
81
|
-
.map(lambda x: TimePeriodHandler(x), na_action="ignore")
|
|
81
|
+
.map(lambda x: TimePeriodHandler(str(x)), na_action="ignore")
|
|
82
82
|
)
|
|
83
83
|
else:
|
|
84
84
|
data[measure.name] = data[measure.name].map(
|
|
@@ -90,7 +90,7 @@ class Aggregation(Operator.Unary):
|
|
|
90
90
|
data[measure.name]
|
|
91
91
|
.astype(object)
|
|
92
92
|
.map(
|
|
93
|
-
lambda x: TimeIntervalHandler.from_iso_format(x),
|
|
93
|
+
lambda x: TimeIntervalHandler.from_iso_format(str(x)),
|
|
94
94
|
na_action="ignore",
|
|
95
95
|
)
|
|
96
96
|
)
|
|
@@ -103,11 +103,13 @@ class Aggregation(Operator.Unary):
|
|
|
103
103
|
elif measure.data_type == Duration:
|
|
104
104
|
if mode == "input":
|
|
105
105
|
data[measure.name] = data[measure.name].map(
|
|
106
|
-
lambda x: PERIOD_IND_MAPPING[x],
|
|
106
|
+
lambda x: PERIOD_IND_MAPPING[x], # type: ignore[index]
|
|
107
|
+
na_action="ignore",
|
|
107
108
|
)
|
|
108
109
|
else:
|
|
109
110
|
data[measure.name] = data[measure.name].map(
|
|
110
|
-
lambda x: PERIOD_IND_MAPPING_REVERSE[x],
|
|
111
|
+
lambda x: PERIOD_IND_MAPPING_REVERSE[x], # type: ignore[index]
|
|
112
|
+
na_action="ignore",
|
|
111
113
|
)
|
|
112
114
|
elif measure.data_type == Boolean:
|
|
113
115
|
if mode == "result":
|
|
@@ -168,9 +170,14 @@ class Aggregation(Operator.Unary):
|
|
|
168
170
|
for measure_name in operand.get_measures_names():
|
|
169
171
|
result_components.pop(measure_name)
|
|
170
172
|
new_comp = Component(
|
|
171
|
-
name="int_var",
|
|
173
|
+
name="int_var",
|
|
174
|
+
role=Role.MEASURE,
|
|
175
|
+
data_type=Integer,
|
|
176
|
+
nullable=True,
|
|
172
177
|
)
|
|
173
178
|
result_components["int_var"] = new_comp
|
|
179
|
+
|
|
180
|
+
# VDS is handled in visit_Aggregation
|
|
174
181
|
return Dataset(name="result", components=result_components, data=None)
|
|
175
182
|
|
|
176
183
|
@classmethod
|
|
@@ -208,7 +215,7 @@ class Aggregation(Operator.Unary):
|
|
|
208
215
|
e = f'"{e}"'
|
|
209
216
|
if cls.type_to_check is not None and cls.op != COUNT:
|
|
210
217
|
functions += (
|
|
211
|
-
f"{cls.py_op}(CAST({e} AS
|
|
218
|
+
f"{cls.py_op}(CAST({e} AS DOUBLE)) AS {e}, " # Count can only be one here
|
|
212
219
|
)
|
|
213
220
|
elif cls.op == COUNT:
|
|
214
221
|
functions += f"{cls.py_op}({e}) AS int_var, "
|
vtlengine/Operators/Analytic.py
CHANGED
|
@@ -37,6 +37,7 @@ from vtlengine.DataTypes import (
|
|
|
37
37
|
)
|
|
38
38
|
from vtlengine.Exceptions import SemanticError
|
|
39
39
|
from vtlengine.Model import Component, Dataset, Role
|
|
40
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
40
41
|
|
|
41
42
|
return_integer_operators = [MAX, MIN, SUM]
|
|
42
43
|
|
|
@@ -157,8 +158,8 @@ class Analytic(Operator.Unary):
|
|
|
157
158
|
role=Role.MEASURE,
|
|
158
159
|
nullable=nullable,
|
|
159
160
|
)
|
|
160
|
-
|
|
161
|
-
return Dataset(name=
|
|
161
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
162
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
162
163
|
|
|
163
164
|
@classmethod
|
|
164
165
|
def analyticfunc(
|
|
@@ -189,21 +190,26 @@ class Analytic(Operator.Unary):
|
|
|
189
190
|
if window is not None:
|
|
190
191
|
mode = "ROWS" if window.type_ == "data" else "RANGE"
|
|
191
192
|
start_mode = (
|
|
192
|
-
window.start_mode
|
|
193
|
-
if window.
|
|
193
|
+
window.start_mode.upper()
|
|
194
|
+
if (isinstance(window.start, int) and window.start != 0)
|
|
195
|
+
or (isinstance(window.start, str) and window.start == "unbounded")
|
|
194
196
|
else ""
|
|
195
197
|
)
|
|
196
198
|
stop_mode = (
|
|
197
|
-
window.stop_mode
|
|
198
|
-
if window.
|
|
199
|
+
window.stop_mode.upper()
|
|
200
|
+
if (isinstance(window.stop, int) and window.stop != 0)
|
|
201
|
+
or (isinstance(window.stop, str) and window.stop == "unbounded")
|
|
199
202
|
else ""
|
|
200
203
|
)
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
204
|
+
start = (
|
|
205
|
+
"UNBOUNDED"
|
|
206
|
+
if window.start == "unbounded" or window.start == -1
|
|
207
|
+
else str(window.start)
|
|
208
|
+
)
|
|
209
|
+
stop = (
|
|
210
|
+
"CURRENT ROW" if window.stop == "current" or window.stop == 0 else str(window.stop)
|
|
211
|
+
)
|
|
212
|
+
window_str = f"{mode} BETWEEN {start} {start_mode} AND {stop} {stop_mode}"
|
|
207
213
|
|
|
208
214
|
# Partitioning
|
|
209
215
|
partition = "PARTITION BY " + ", ".join(partitioning) if len(partitioning) > 0 else ""
|
|
@@ -224,7 +230,7 @@ class Analytic(Operator.Unary):
|
|
|
224
230
|
if cls.op == RANK:
|
|
225
231
|
measure_query = f"{cls.sql_op}()"
|
|
226
232
|
elif cls.op == RATIO_TO_REPORT:
|
|
227
|
-
measure_query = f"CAST({measure} AS
|
|
233
|
+
measure_query = f"CAST({measure} AS DOUBLE) / SUM(CAST({measure} AS DOUBLE))"
|
|
228
234
|
elif cls.op in [LAG, LEAD]:
|
|
229
235
|
measure_query = f"{cls.sql_op}({measure}, {','.join(map(str, params or []))})"
|
|
230
236
|
else:
|
|
@@ -22,6 +22,7 @@ from vtlengine.DataTypes import (
|
|
|
22
22
|
from vtlengine.DataTypes.TimeHandling import str_period_to_date
|
|
23
23
|
from vtlengine.Exceptions import SemanticError
|
|
24
24
|
from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar
|
|
25
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
25
26
|
|
|
26
27
|
duration_mapping = {"A": 6, "S": 5, "Q": 4, "M": 3, "W": 2, "D": 1}
|
|
27
28
|
|
|
@@ -331,7 +332,8 @@ class Cast(Operator.Unary):
|
|
|
331
332
|
role=Role.MEASURE,
|
|
332
333
|
nullable=measure.nullable,
|
|
333
334
|
)
|
|
334
|
-
|
|
335
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
336
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
335
337
|
|
|
336
338
|
@classmethod
|
|
337
339
|
def component_validation( # type: ignore[override]
|
|
@@ -346,7 +348,8 @@ class Cast(Operator.Unary):
|
|
|
346
348
|
|
|
347
349
|
from_type = operand.data_type
|
|
348
350
|
cls.check_cast(from_type, to_type, mask)
|
|
349
|
-
|
|
351
|
+
comp_name = VirtualCounter._new_dc_name()
|
|
352
|
+
return DataComponent(name=comp_name, data=None, data_type=to_type, role=operand.role)
|
|
350
353
|
|
|
351
354
|
@classmethod
|
|
352
355
|
def scalar_validation( # type: ignore[override]
|
vtlengine/Operators/Clause.py
CHANGED
|
@@ -15,6 +15,7 @@ from vtlengine.DataTypes import (
|
|
|
15
15
|
from vtlengine.Exceptions import SemanticError
|
|
16
16
|
from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar
|
|
17
17
|
from vtlengine.Operators import Operator
|
|
18
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
18
19
|
|
|
19
20
|
|
|
20
21
|
class Calc(Operator):
|
|
@@ -23,7 +24,8 @@ class Calc(Operator):
|
|
|
23
24
|
@classmethod
|
|
24
25
|
def validate(cls, operands: List[Union[DataComponent, Scalar]], dataset: Dataset) -> Dataset:
|
|
25
26
|
result_components = {name: copy(comp) for name, comp in dataset.components.items()}
|
|
26
|
-
|
|
27
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
28
|
+
result_dataset = Dataset(name=dataset_name, components=result_components, data=None)
|
|
27
29
|
|
|
28
30
|
for operand in operands:
|
|
29
31
|
if operand.name in result_dataset.components:
|
|
@@ -70,7 +72,8 @@ class Aggregate(Operator):
|
|
|
70
72
|
|
|
71
73
|
@classmethod
|
|
72
74
|
def validate(cls, operands: List[Union[DataComponent, Scalar]], dataset: Dataset) -> Dataset:
|
|
73
|
-
|
|
75
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
76
|
+
result_dataset = Dataset(name=dataset_name, components=dataset.components, data=None)
|
|
74
77
|
|
|
75
78
|
for operand in operands:
|
|
76
79
|
if operand.name in dataset.get_identifiers_names() or (
|
|
@@ -122,7 +125,8 @@ class Filter(Operator):
|
|
|
122
125
|
def validate(cls, condition: DataComponent, dataset: Dataset) -> Dataset:
|
|
123
126
|
if condition.data_type != Boolean:
|
|
124
127
|
raise ValueError(f"Filter condition must be of type {Boolean}")
|
|
125
|
-
|
|
128
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
129
|
+
return Dataset(name=dataset_name, components=dataset.components, data=None)
|
|
126
130
|
|
|
127
131
|
@classmethod
|
|
128
132
|
def evaluate(cls, condition: DataComponent, dataset: Dataset) -> Dataset:
|
|
@@ -139,19 +143,20 @@ class Keep(Operator):
|
|
|
139
143
|
|
|
140
144
|
@classmethod
|
|
141
145
|
def validate(cls, operands: List[str], dataset: Dataset) -> Dataset:
|
|
146
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
142
147
|
for operand in operands:
|
|
143
148
|
if operand not in dataset.get_components_names():
|
|
144
149
|
raise SemanticError(
|
|
145
|
-
"1-1-1-10", op=cls.op, comp_name=operand, dataset_name=
|
|
150
|
+
"1-1-1-10", op=cls.op, comp_name=operand, dataset_name=dataset_name
|
|
146
151
|
)
|
|
147
152
|
if dataset.get_component(operand).role == Role.IDENTIFIER:
|
|
148
|
-
raise SemanticError("1-1-6-2", op=cls.op, name=operand, dataset=
|
|
153
|
+
raise SemanticError("1-1-6-2", op=cls.op, name=operand, dataset=dataset_name)
|
|
149
154
|
result_components = {
|
|
150
155
|
name: comp
|
|
151
156
|
for name, comp in dataset.components.items()
|
|
152
157
|
if comp.name in operands or comp.role == Role.IDENTIFIER
|
|
153
158
|
}
|
|
154
|
-
return Dataset(name=
|
|
159
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
155
160
|
|
|
156
161
|
@classmethod
|
|
157
162
|
def evaluate(cls, operands: List[str], dataset: Dataset) -> Dataset:
|
|
@@ -170,17 +175,18 @@ class Drop(Operator):
|
|
|
170
175
|
|
|
171
176
|
@classmethod
|
|
172
177
|
def validate(cls, operands: List[str], dataset: Dataset) -> Dataset:
|
|
178
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
173
179
|
for operand in operands:
|
|
174
180
|
if operand not in dataset.components:
|
|
175
|
-
raise SemanticError("1-1-1-10", comp_name=operand, dataset_name=
|
|
181
|
+
raise SemanticError("1-1-1-10", comp_name=operand, dataset_name=dataset_name)
|
|
176
182
|
if dataset.get_component(operand).role == Role.IDENTIFIER:
|
|
177
|
-
raise SemanticError("1-1-6-2", op=cls.op, name=operand, dataset=
|
|
183
|
+
raise SemanticError("1-1-6-2", op=cls.op, name=operand, dataset=dataset_name)
|
|
178
184
|
if len(dataset.components) == len(operands):
|
|
179
185
|
raise SemanticError("1-1-6-12", op=cls.op)
|
|
180
186
|
result_components = {
|
|
181
187
|
name: comp for name, comp in dataset.components.items() if comp.name not in operands
|
|
182
188
|
}
|
|
183
|
-
return Dataset(name=
|
|
189
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
184
190
|
|
|
185
191
|
@classmethod
|
|
186
192
|
def evaluate(cls, operands: List[str], dataset: Dataset) -> Dataset:
|
|
@@ -195,6 +201,7 @@ class Rename(Operator):
|
|
|
195
201
|
|
|
196
202
|
@classmethod
|
|
197
203
|
def validate(cls, operands: List[RenameNode], dataset: Dataset) -> Dataset:
|
|
204
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
198
205
|
from_names = [operand.old_name for operand in operands]
|
|
199
206
|
if len(from_names) != len(set(from_names)):
|
|
200
207
|
duplicates = set([name for name in from_names if from_names.count(name) > 1])
|
|
@@ -211,14 +218,14 @@ class Rename(Operator):
|
|
|
211
218
|
"1-1-1-10",
|
|
212
219
|
op=cls.op,
|
|
213
220
|
comp_name=operand.old_name,
|
|
214
|
-
dataset_name=
|
|
221
|
+
dataset_name=dataset_name,
|
|
215
222
|
)
|
|
216
223
|
if operand.new_name in dataset.components:
|
|
217
224
|
raise SemanticError(
|
|
218
225
|
"1-1-6-8",
|
|
219
226
|
op=cls.op,
|
|
220
227
|
comp_name=operand.new_name,
|
|
221
|
-
dataset_name=
|
|
228
|
+
dataset_name=dataset_name,
|
|
222
229
|
)
|
|
223
230
|
|
|
224
231
|
result_components = {comp.name: comp for comp in dataset.components.values()}
|
|
@@ -230,8 +237,7 @@ class Rename(Operator):
|
|
|
230
237
|
nullable=result_components[operand.old_name].nullable,
|
|
231
238
|
)
|
|
232
239
|
del result_components[operand.old_name]
|
|
233
|
-
|
|
234
|
-
return Dataset(name=dataset.name, components=result_components, data=None)
|
|
240
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
235
241
|
|
|
236
242
|
@classmethod
|
|
237
243
|
def evaluate(cls, operands: List[RenameNode], dataset: Dataset) -> Dataset:
|
|
@@ -256,6 +262,7 @@ class Pivot(Operator):
|
|
|
256
262
|
class Unpivot(Operator):
|
|
257
263
|
@classmethod
|
|
258
264
|
def validate(cls, operands: List[str], dataset: Dataset) -> Dataset:
|
|
265
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
259
266
|
if len(operands) != 2:
|
|
260
267
|
raise ValueError("Unpivot clause requires two operands")
|
|
261
268
|
identifier, measure = operands
|
|
@@ -263,10 +270,10 @@ class Unpivot(Operator):
|
|
|
263
270
|
if len(dataset.get_identifiers()) < 1:
|
|
264
271
|
raise SemanticError("1-3-27", op=cls.op)
|
|
265
272
|
if identifier in dataset.components:
|
|
266
|
-
raise SemanticError("1-1-6-2", op=cls.op, name=identifier, dataset=
|
|
273
|
+
raise SemanticError("1-1-6-2", op=cls.op, name=identifier, dataset=dataset_name)
|
|
267
274
|
|
|
268
275
|
result_components = {comp.name: comp for comp in dataset.get_identifiers()}
|
|
269
|
-
result_dataset = Dataset(name=
|
|
276
|
+
result_dataset = Dataset(name=dataset_name, components=result_components, data=None)
|
|
270
277
|
# noinspection PyTypeChecker
|
|
271
278
|
result_dataset.add_component(
|
|
272
279
|
Component(name=identifier, data_type=String, role=Role.IDENTIFIER, nullable=False)
|
|
@@ -306,6 +313,7 @@ class Sub(Operator):
|
|
|
306
313
|
|
|
307
314
|
@classmethod
|
|
308
315
|
def validate(cls, operands: List[DataComponent], dataset: Dataset) -> Dataset:
|
|
316
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
309
317
|
if len(dataset.get_identifiers()) < 1:
|
|
310
318
|
raise SemanticError("1-3-27", op=cls.op)
|
|
311
319
|
for operand in operands:
|
|
@@ -314,14 +322,14 @@ class Sub(Operator):
|
|
|
314
322
|
"1-1-1-10",
|
|
315
323
|
op=cls.op,
|
|
316
324
|
comp_name=operand.name,
|
|
317
|
-
dataset_name=
|
|
325
|
+
dataset_name=dataset_name,
|
|
318
326
|
)
|
|
319
327
|
if operand.role != Role.IDENTIFIER:
|
|
320
328
|
raise SemanticError(
|
|
321
329
|
"1-1-6-10",
|
|
322
330
|
op=cls.op,
|
|
323
331
|
operand=operand.name,
|
|
324
|
-
dataset_name=
|
|
332
|
+
dataset_name=dataset_name,
|
|
325
333
|
)
|
|
326
334
|
if isinstance(operand, Scalar):
|
|
327
335
|
raise SemanticError("1-1-6-5", op=cls.op, name=operand.name)
|
|
@@ -331,7 +339,7 @@ class Sub(Operator):
|
|
|
331
339
|
for name, comp in dataset.components.items()
|
|
332
340
|
if comp.name not in [operand.name for operand in operands]
|
|
333
341
|
}
|
|
334
|
-
return Dataset(name=
|
|
342
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
335
343
|
|
|
336
344
|
@classmethod
|
|
337
345
|
def evaluate(cls, operands: List[DataComponent], dataset: Dataset) -> Dataset:
|
|
@@ -25,6 +25,7 @@ from vtlengine.AST.Grammar.tokens import (
|
|
|
25
25
|
from vtlengine.DataTypes import COMP_NAME_MAPPING, Boolean, Null, Number, String
|
|
26
26
|
from vtlengine.Exceptions import SemanticError
|
|
27
27
|
from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar, ScalarSet
|
|
28
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
28
29
|
|
|
29
30
|
|
|
30
31
|
class Unary(Operator.Unary):
|
|
@@ -403,6 +404,7 @@ class ExistIn(Operator.Operator):
|
|
|
403
404
|
def validate(
|
|
404
405
|
cls, dataset_1: Dataset, dataset_2: Dataset, retain_element: Optional[Boolean]
|
|
405
406
|
) -> Any:
|
|
407
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
406
408
|
left_identifiers = dataset_1.get_identifiers_names()
|
|
407
409
|
right_identifiers = dataset_2.get_identifiers_names()
|
|
408
410
|
|
|
@@ -412,7 +414,7 @@ class ExistIn(Operator.Operator):
|
|
|
412
414
|
raise ValueError("Datasets must have common identifiers")
|
|
413
415
|
|
|
414
416
|
result_components = {comp.name: copy(comp) for comp in dataset_1.get_identifiers()}
|
|
415
|
-
result_dataset = Dataset(name=
|
|
417
|
+
result_dataset = Dataset(name=dataset_name, components=result_components, data=None)
|
|
416
418
|
result_dataset.add_component(
|
|
417
419
|
Component(name="bool_var", data_type=Boolean, role=Role.MEASURE, nullable=False)
|
|
418
420
|
)
|