vtlengine 1.0.3rc2__py3-none-any.whl → 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +55 -20
- vtlengine/API/__init__.py +11 -2
- vtlengine/API/data/schema/json_schema_2.1.json +116 -0
- vtlengine/AST/ASTConstructor.py +5 -4
- vtlengine/AST/ASTConstructorModules/Expr.py +47 -48
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +45 -23
- vtlengine/AST/ASTConstructorModules/Terminals.py +21 -11
- vtlengine/AST/ASTEncoders.py +1 -1
- vtlengine/AST/DAG/__init__.py +0 -3
- vtlengine/AST/Grammar/lexer.py +0 -1
- vtlengine/AST/Grammar/parser.py +185 -440
- vtlengine/AST/VtlVisitor.py +0 -1
- vtlengine/DataTypes/TimeHandling.py +50 -15
- vtlengine/DataTypes/__init__.py +79 -7
- vtlengine/Exceptions/__init__.py +3 -5
- vtlengine/Exceptions/messages.py +65 -105
- vtlengine/Interpreter/__init__.py +83 -38
- vtlengine/Model/__init__.py +7 -9
- vtlengine/Operators/Aggregation.py +13 -7
- vtlengine/Operators/Analytic.py +48 -9
- vtlengine/Operators/Assignment.py +0 -1
- vtlengine/Operators/CastOperator.py +44 -44
- vtlengine/Operators/Clause.py +16 -10
- vtlengine/Operators/Comparison.py +20 -12
- vtlengine/Operators/Conditional.py +30 -13
- vtlengine/Operators/General.py +9 -4
- vtlengine/Operators/HROperators.py +4 -14
- vtlengine/Operators/Join.py +15 -14
- vtlengine/Operators/Numeric.py +32 -26
- vtlengine/Operators/RoleSetter.py +6 -2
- vtlengine/Operators/Set.py +12 -8
- vtlengine/Operators/String.py +9 -9
- vtlengine/Operators/Time.py +136 -116
- vtlengine/Operators/Validation.py +10 -4
- vtlengine/Operators/__init__.py +56 -69
- vtlengine/Utils/__init__.py +6 -1
- vtlengine/files/output/__init__.py +0 -1
- vtlengine/files/output/_time_period_representation.py +2 -1
- vtlengine/files/parser/__init__.py +44 -10
- vtlengine/files/parser/_rfc_dialect.py +1 -1
- vtlengine/files/parser/_time_checking.py +4 -4
- {vtlengine-1.0.3rc2.dist-info → vtlengine-1.0.4.dist-info}/METADATA +9 -7
- vtlengine-1.0.4.dist-info/RECORD +58 -0
- {vtlengine-1.0.3rc2.dist-info → vtlengine-1.0.4.dist-info}/WHEEL +1 -1
- vtlengine/DataTypes/NumericTypesHandling.py +0 -38
- vtlengine-1.0.3rc2.dist-info/RECORD +0 -58
- {vtlengine-1.0.3rc2.dist-info → vtlengine-1.0.4.dist-info}/LICENSE.md +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from copy import copy, deepcopy
|
|
2
2
|
from dataclasses import dataclass
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Any, Dict, List, Optional, Union
|
|
4
|
+
from typing import Any, Dict, List, Optional, Type, Union
|
|
5
5
|
|
|
6
6
|
import pandas as pd
|
|
7
7
|
|
|
@@ -68,10 +68,19 @@ from vtlengine.Operators.CastOperator import Cast
|
|
|
68
68
|
from vtlengine.Operators.Comparison import Between, ExistIn
|
|
69
69
|
from vtlengine.Operators.Conditional import Case, If
|
|
70
70
|
from vtlengine.Operators.General import Eval
|
|
71
|
-
from vtlengine.Operators.HROperators import
|
|
71
|
+
from vtlengine.Operators.HROperators import (
|
|
72
|
+
HAAssignment,
|
|
73
|
+
Hierarchy,
|
|
74
|
+
get_measure_from_dataset,
|
|
75
|
+
)
|
|
72
76
|
from vtlengine.Operators.Numeric import Round, Trunc
|
|
73
77
|
from vtlengine.Operators.String import Instr, Replace, Substr
|
|
74
|
-
from vtlengine.Operators.Time import
|
|
78
|
+
from vtlengine.Operators.Time import (
|
|
79
|
+
Current_Date,
|
|
80
|
+
Date_Add,
|
|
81
|
+
Fill_time_series,
|
|
82
|
+
Time_Aggregation,
|
|
83
|
+
)
|
|
75
84
|
from vtlengine.Operators.Validation import Check, Check_Datapoint, Check_Hierarchy
|
|
76
85
|
from vtlengine.Utils import (
|
|
77
86
|
AGGREGATION_MAPPING,
|
|
@@ -153,7 +162,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
153
162
|
for ds_name in self.ds_analysis[INSERT][statement_num]:
|
|
154
163
|
if ds_name in self.datapoints_paths:
|
|
155
164
|
self.datasets[ds_name].data = load_datapoints(
|
|
156
|
-
self.datasets[ds_name].components,
|
|
165
|
+
self.datasets[ds_name].components,
|
|
166
|
+
ds_name,
|
|
167
|
+
self.datapoints_paths[ds_name],
|
|
157
168
|
)
|
|
158
169
|
elif ds_name in self.datasets and self.datasets[ds_name].data is None:
|
|
159
170
|
_fill_dataset_empty_data(self.datasets[ds_name])
|
|
@@ -180,7 +191,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
180
191
|
|
|
181
192
|
# Saving only datasets, no scalars
|
|
182
193
|
save_datapoints(
|
|
183
|
-
self.time_period_representation,
|
|
194
|
+
self.time_period_representation,
|
|
195
|
+
self.datasets[ds_name],
|
|
196
|
+
self.output_path,
|
|
184
197
|
)
|
|
185
198
|
self.datasets[ds_name].data = None
|
|
186
199
|
|
|
@@ -201,8 +214,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
201
214
|
if isinstance(child, (AST.Assignment, AST.PersistentAssignment)):
|
|
202
215
|
vtlengine.Exceptions.dataset_output = child.left.value # type: ignore[attr-defined]
|
|
203
216
|
self._load_datapoints_efficient(statement_num)
|
|
204
|
-
if
|
|
205
|
-
|
|
217
|
+
if not isinstance(
|
|
218
|
+
child, (AST.HRuleset, AST.DPRuleset, AST.Operator)
|
|
219
|
+
) and not isinstance(child, (AST.Assignment, AST.PersistentAssignment)):
|
|
206
220
|
raise SemanticError("1-3-17")
|
|
207
221
|
result = self.visit(child)
|
|
208
222
|
|
|
@@ -229,15 +243,14 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
229
243
|
# Definition Language
|
|
230
244
|
|
|
231
245
|
def visit_Operator(self, node: AST.Operator) -> None:
|
|
232
|
-
|
|
233
246
|
if self.udos is None:
|
|
234
247
|
self.udos = {}
|
|
235
248
|
elif node.op in self.udos:
|
|
236
249
|
raise ValueError(f"User Defined Operator {node.op} already exists")
|
|
237
250
|
|
|
238
|
-
param_info = []
|
|
251
|
+
param_info: List[Dict[str, Union[str, Type[ScalarType], AST.AST]]] = []
|
|
239
252
|
for param in node.parameters:
|
|
240
|
-
if param.name in param_info:
|
|
253
|
+
if param.name in [x["name"] for x in param_info]:
|
|
241
254
|
raise ValueError(f"Duplicated Parameter {param.name} in UDO {node.op}")
|
|
242
255
|
# We use a string for model types, but the data type class for basic types
|
|
243
256
|
# (Integer, Number, String, Boolean, ...)
|
|
@@ -262,7 +275,6 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
262
275
|
}
|
|
263
276
|
|
|
264
277
|
def visit_DPRuleset(self, node: AST.DPRuleset) -> None:
|
|
265
|
-
|
|
266
278
|
# Rule names are optional, if not provided, they are generated.
|
|
267
279
|
# If provided, all must be provided
|
|
268
280
|
rule_names = [rule.name for rule in node.rules if rule.name is not None]
|
|
@@ -358,7 +370,6 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
358
370
|
return self.visit_Assignment(node)
|
|
359
371
|
|
|
360
372
|
def visit_BinOp(self, node: AST.BinOp) -> Any:
|
|
361
|
-
|
|
362
373
|
is_from_if = False
|
|
363
374
|
if (
|
|
364
375
|
not self.is_from_condition
|
|
@@ -369,8 +380,12 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
369
380
|
is_from_if = self.is_from_if
|
|
370
381
|
self.is_from_if = False
|
|
371
382
|
|
|
372
|
-
if (
|
|
373
|
-
|
|
383
|
+
if (
|
|
384
|
+
self.is_from_join
|
|
385
|
+
and node.op in [MEMBERSHIP, AGGREGATE]
|
|
386
|
+
and hasattr(node.left, "value")
|
|
387
|
+
and hasattr(node.right, "value")
|
|
388
|
+
):
|
|
374
389
|
if self.udo_params is not None and node.right.value in self.udo_params[-1]:
|
|
375
390
|
comp_name = f"{node.left.value}#{self.udo_params[-1][node.right.value]}"
|
|
376
391
|
else:
|
|
@@ -421,7 +436,10 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
421
436
|
if node.operand is not None and operand is not None:
|
|
422
437
|
op_comp: DataComponent = self.visit(node.operand)
|
|
423
438
|
comps_to_keep = {}
|
|
424
|
-
for
|
|
439
|
+
for (
|
|
440
|
+
comp_name,
|
|
441
|
+
comp,
|
|
442
|
+
) in self.regular_aggregation_dataset.components.items():
|
|
425
443
|
if comp.role == Role.IDENTIFIER:
|
|
426
444
|
comps_to_keep[comp_name] = copy(comp)
|
|
427
445
|
comps_to_keep[op_comp.name] = Component(
|
|
@@ -744,8 +762,12 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
744
762
|
is_partial_present = 0
|
|
745
763
|
found_comp = None
|
|
746
764
|
for comp_name in self.regular_aggregation_dataset.get_components_names():
|
|
747
|
-
if (
|
|
748
|
-
|
|
765
|
+
if (
|
|
766
|
+
"#" in comp_name
|
|
767
|
+
and comp_name.split("#")[1] == node.value
|
|
768
|
+
or "#" in node.value
|
|
769
|
+
and node.value.split("#")[1] == comp_name
|
|
770
|
+
):
|
|
749
771
|
is_partial_present += 1
|
|
750
772
|
found_comp = comp_name
|
|
751
773
|
if is_partial_present == 0:
|
|
@@ -783,7 +805,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
783
805
|
comp_name = self.ruleset_signature[node.value]
|
|
784
806
|
if comp_name not in self.ruleset_dataset.components:
|
|
785
807
|
raise SemanticError(
|
|
786
|
-
"1-1-1-10",
|
|
808
|
+
"1-1-1-10",
|
|
809
|
+
comp_name=node.value,
|
|
810
|
+
dataset_name=self.ruleset_dataset.name,
|
|
787
811
|
)
|
|
788
812
|
data = None if self.rule_data is None else self.rule_data[comp_name]
|
|
789
813
|
return DataComponent(
|
|
@@ -938,7 +962,6 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
938
962
|
return REGULAR_AGGREGATION_MAPPING[node.op].analyze(operands, dataset)
|
|
939
963
|
|
|
940
964
|
def visit_If(self, node: AST.If) -> Dataset:
|
|
941
|
-
|
|
942
965
|
self.is_from_condition = True
|
|
943
966
|
condition = self.visit(node.condition)
|
|
944
967
|
self.is_from_condition = False
|
|
@@ -948,7 +971,10 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
948
971
|
elseValue = self.visit(node.elseOp)
|
|
949
972
|
if not isinstance(thenValue, Scalar) or not isinstance(elseValue, Scalar):
|
|
950
973
|
raise SemanticError(
|
|
951
|
-
"1-1-9-3",
|
|
974
|
+
"1-1-9-3",
|
|
975
|
+
op="If_op",
|
|
976
|
+
then_name=thenValue.name,
|
|
977
|
+
else_name=elseValue.name,
|
|
952
978
|
)
|
|
953
979
|
if condition.value:
|
|
954
980
|
return self.visit(node.thenOp)
|
|
@@ -1024,7 +1050,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1024
1050
|
|
|
1025
1051
|
def visit_Constant(self, node: AST.Constant) -> Any:
|
|
1026
1052
|
return Scalar(
|
|
1027
|
-
name=str(node.value),
|
|
1053
|
+
name=str(node.value),
|
|
1054
|
+
value=node.value,
|
|
1055
|
+
data_type=BASIC_TYPES[type(node.value)],
|
|
1028
1056
|
)
|
|
1029
1057
|
|
|
1030
1058
|
def visit_JoinOp(self, node: AST.JoinOp) -> None:
|
|
@@ -1130,7 +1158,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1130
1158
|
for comp_name in node.children[2:]:
|
|
1131
1159
|
if comp_name.__str__() not in dataset_element.components:
|
|
1132
1160
|
raise SemanticError(
|
|
1133
|
-
"1-1-1-10",
|
|
1161
|
+
"1-1-1-10",
|
|
1162
|
+
comp_name=comp_name,
|
|
1163
|
+
dataset_name=dataset_element.name,
|
|
1134
1164
|
)
|
|
1135
1165
|
if dpr_info is not None and dpr_info["signature_type"] == "variable":
|
|
1136
1166
|
for i, comp_name in enumerate(node.children[2:]):
|
|
@@ -1164,7 +1194,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1164
1194
|
|
|
1165
1195
|
# Datapoint Ruleset final evaluation
|
|
1166
1196
|
return Check_Datapoint.analyze(
|
|
1167
|
-
dataset_element=dataset_element,
|
|
1197
|
+
dataset_element=dataset_element,
|
|
1198
|
+
rule_info=rule_output_values,
|
|
1199
|
+
output=output,
|
|
1168
1200
|
)
|
|
1169
1201
|
elif node.op in (CHECK_HIERARCHY, HIERARCHY):
|
|
1170
1202
|
if len(node.children) == 3:
|
|
@@ -1203,7 +1235,10 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1203
1235
|
and hr_info["signature"] != component
|
|
1204
1236
|
):
|
|
1205
1237
|
raise SemanticError(
|
|
1206
|
-
"1-1-10-3",
|
|
1238
|
+
"1-1-10-3",
|
|
1239
|
+
op=node.op,
|
|
1240
|
+
found=component,
|
|
1241
|
+
expected=hr_info["signature"],
|
|
1207
1242
|
)
|
|
1208
1243
|
elif hr_info["node"].signature_type == "valuedomain" and component is None:
|
|
1209
1244
|
raise SemanticError("1-1-10-4", op=node.op)
|
|
@@ -1215,7 +1250,10 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1215
1250
|
and cond_components[i] != cond_comp
|
|
1216
1251
|
):
|
|
1217
1252
|
raise SemanticError(
|
|
1218
|
-
"1-1-10-6",
|
|
1253
|
+
"1-1-10-6",
|
|
1254
|
+
op=node.op,
|
|
1255
|
+
expected=cond_comp,
|
|
1256
|
+
found=cond_components[i],
|
|
1219
1257
|
)
|
|
1220
1258
|
cond_info[cond_comp] = cond_components[i]
|
|
1221
1259
|
|
|
@@ -1270,7 +1308,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1270
1308
|
# Final evaluation
|
|
1271
1309
|
if node.op == CHECK_HIERARCHY:
|
|
1272
1310
|
result = Check_Hierarchy.analyze(
|
|
1273
|
-
dataset_element=dataset,
|
|
1311
|
+
dataset_element=dataset,
|
|
1312
|
+
rule_info=rule_output_values,
|
|
1313
|
+
output=output,
|
|
1274
1314
|
)
|
|
1275
1315
|
del rule_output_values
|
|
1276
1316
|
else:
|
|
@@ -1396,10 +1436,12 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1396
1436
|
left_operand.data = pd.DataFrame({measure_name: []})
|
|
1397
1437
|
if right_operand.data is None:
|
|
1398
1438
|
right_operand.data = pd.DataFrame({measure_name: []})
|
|
1399
|
-
left_null_indexes = set(
|
|
1400
|
-
measure_name].isnull()].index
|
|
1401
|
-
|
|
1402
|
-
|
|
1439
|
+
left_null_indexes = set(
|
|
1440
|
+
left_operand.data[left_operand.data[measure_name].isnull()].index
|
|
1441
|
+
)
|
|
1442
|
+
right_null_indexes = set(
|
|
1443
|
+
right_operand.data[right_operand.data[measure_name].isnull()].index
|
|
1444
|
+
)
|
|
1403
1445
|
# If no indexes are in common, then one datapoint is not null
|
|
1404
1446
|
invalid_indexes = list(left_null_indexes.intersection(right_null_indexes))
|
|
1405
1447
|
if len(invalid_indexes) > 0:
|
|
@@ -1415,7 +1457,6 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1415
1457
|
return HR_UNARY_MAPPING[node.op].analyze(operand)
|
|
1416
1458
|
|
|
1417
1459
|
def visit_Validation(self, node: AST.Validation) -> Dataset:
|
|
1418
|
-
|
|
1419
1460
|
validation_element = self.visit(node.validation)
|
|
1420
1461
|
if not isinstance(validation_element, Dataset):
|
|
1421
1462
|
raise ValueError(f"Expected dataset, got {type(validation_element).__name__}")
|
|
@@ -1532,7 +1573,10 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1532
1573
|
components.update(
|
|
1533
1574
|
{
|
|
1534
1575
|
name: Component(
|
|
1535
|
-
name=name,
|
|
1576
|
+
name=name,
|
|
1577
|
+
data_type=BASIC_TYPES[int],
|
|
1578
|
+
role=Role.MEASURE,
|
|
1579
|
+
nullable=True,
|
|
1536
1580
|
)
|
|
1537
1581
|
}
|
|
1538
1582
|
)
|
|
@@ -1555,7 +1599,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1555
1599
|
)
|
|
1556
1600
|
merge_index = merge_dataset.data[merge_dataset.get_measures_names()[0]].to_list()
|
|
1557
1601
|
ids = merge_dataset.get_identifiers_names()
|
|
1558
|
-
if isinstance(left_operand, Dataset
|
|
1602
|
+
if isinstance(left_operand, (Dataset, DataComponent)):
|
|
1559
1603
|
if left_operand.data is None:
|
|
1560
1604
|
return left_operand, right_operand
|
|
1561
1605
|
if isinstance(left_operand, Dataset):
|
|
@@ -1571,7 +1615,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1571
1615
|
else:
|
|
1572
1616
|
left = left_operand.data
|
|
1573
1617
|
left_operand.data = left.reindex(merge_index, fill_value=None)
|
|
1574
|
-
if isinstance(right_operand, Dataset
|
|
1618
|
+
if isinstance(right_operand, (Dataset, DataComponent)):
|
|
1575
1619
|
if right_operand.data is None:
|
|
1576
1620
|
return left_operand, right_operand
|
|
1577
1621
|
if isinstance(right_operand, Dataset):
|
|
@@ -1737,9 +1781,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1737
1781
|
signature_values[param["name"]] = self.visit(node.params[i])
|
|
1738
1782
|
elif param["type"] in ["Dataset", "Component"]:
|
|
1739
1783
|
if isinstance(node.params[i], AST.VarID):
|
|
1740
|
-
signature_values[param["name"]] = node.params[
|
|
1741
|
-
i
|
|
1742
|
-
].value # type: ignore[attr-defined]
|
|
1784
|
+
signature_values[param["name"]] = node.params[i].value # type: ignore[attr-defined]
|
|
1743
1785
|
else:
|
|
1744
1786
|
param_element = self.visit(node.params[i])
|
|
1745
1787
|
if isinstance(param_element, Dataset):
|
|
@@ -1834,5 +1876,8 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1834
1876
|
operand = self.visit(node.operand)
|
|
1835
1877
|
|
|
1836
1878
|
return Time_Aggregation.analyze(
|
|
1837
|
-
operand=operand,
|
|
1879
|
+
operand=operand,
|
|
1880
|
+
period_from=node.period_from,
|
|
1881
|
+
period_to=node.period_to,
|
|
1882
|
+
conf=node.conf,
|
|
1838
1883
|
)
|
vtlengine/Model/__init__.py
CHANGED
|
@@ -229,23 +229,21 @@ class Dataset:
|
|
|
229
229
|
self.data[comp.name] = self.data[comp.name].astype(str)
|
|
230
230
|
other.data[comp.name] = other.data[comp.name].astype(str)
|
|
231
231
|
self.data[comp.name] = self.data[comp.name].map(
|
|
232
|
-
lambda x: str(TimePeriodHandler(x)) if x != "" else "",
|
|
232
|
+
lambda x: str(TimePeriodHandler(x)) if x != "" else "",
|
|
233
|
+
na_action="ignore",
|
|
233
234
|
)
|
|
234
235
|
other.data[comp.name] = other.data[comp.name].map(
|
|
235
|
-
lambda x: str(TimePeriodHandler(x)) if x != "" else "",
|
|
236
|
+
lambda x: str(TimePeriodHandler(x)) if x != "" else "",
|
|
237
|
+
na_action="ignore",
|
|
236
238
|
)
|
|
237
239
|
elif type_name in ["Integer", "Number"]:
|
|
238
240
|
type_ = "int64" if type_name == "Integer" else "float32"
|
|
239
241
|
# We use here a number to avoid errors on equality on empty strings
|
|
240
242
|
self.data[comp.name] = (
|
|
241
|
-
self.data[comp.name]
|
|
242
|
-
.replace("", -1234997)
|
|
243
|
-
.astype(type_) # type: ignore[call-overload]
|
|
243
|
+
self.data[comp.name].replace("", -1234997).astype(type_) # type: ignore[call-overload]
|
|
244
244
|
)
|
|
245
245
|
other.data[comp.name] = (
|
|
246
|
-
other.data[comp.name]
|
|
247
|
-
.replace("", -1234997)
|
|
248
|
-
.astype(type_) # type: ignore[call-overload]
|
|
246
|
+
other.data[comp.name].replace("", -1234997).astype(type_) # type: ignore[call-overload]
|
|
249
247
|
)
|
|
250
248
|
try:
|
|
251
249
|
assert_frame_equal(
|
|
@@ -334,7 +332,7 @@ class Dataset:
|
|
|
334
332
|
return {
|
|
335
333
|
"name": self.name,
|
|
336
334
|
"components": {k: v.to_dict() for k, v in self.components.items()},
|
|
337
|
-
"data": self.data.to_dict(orient="records") if self.data is not None else None,
|
|
335
|
+
"data": (self.data.to_dict(orient="records") if self.data is not None else None),
|
|
338
336
|
}
|
|
339
337
|
|
|
340
338
|
def to_json(self) -> str:
|
|
@@ -29,8 +29,8 @@ from vtlengine.DataTypes import (
|
|
|
29
29
|
unary_implicit_promotion,
|
|
30
30
|
)
|
|
31
31
|
from vtlengine.DataTypes.TimeHandling import (
|
|
32
|
-
|
|
33
|
-
|
|
32
|
+
PERIOD_IND_MAPPING,
|
|
33
|
+
PERIOD_IND_MAPPING_REVERSE,
|
|
34
34
|
TimeIntervalHandler,
|
|
35
35
|
TimePeriodHandler,
|
|
36
36
|
)
|
|
@@ -89,7 +89,10 @@ class Aggregation(Operator.Unary):
|
|
|
89
89
|
data[measure.name] = (
|
|
90
90
|
data[measure.name]
|
|
91
91
|
.astype(object)
|
|
92
|
-
.map(
|
|
92
|
+
.map(
|
|
93
|
+
lambda x: TimeIntervalHandler.from_iso_format(x),
|
|
94
|
+
na_action="ignore",
|
|
95
|
+
)
|
|
93
96
|
)
|
|
94
97
|
else:
|
|
95
98
|
data[measure.name] = data[measure.name].map(
|
|
@@ -100,11 +103,11 @@ class Aggregation(Operator.Unary):
|
|
|
100
103
|
elif measure.data_type == Duration:
|
|
101
104
|
if mode == "input":
|
|
102
105
|
data[measure.name] = data[measure.name].map(
|
|
103
|
-
lambda x:
|
|
106
|
+
lambda x: PERIOD_IND_MAPPING[x], na_action="ignore"
|
|
104
107
|
)
|
|
105
108
|
else:
|
|
106
109
|
data[measure.name] = data[measure.name].map(
|
|
107
|
-
lambda x:
|
|
110
|
+
lambda x: PERIOD_IND_MAPPING_REVERSE[x], na_action="ignore"
|
|
108
111
|
)
|
|
109
112
|
elif measure.data_type == Boolean:
|
|
110
113
|
if mode == "result":
|
|
@@ -128,7 +131,10 @@ class Aggregation(Operator.Unary):
|
|
|
128
131
|
for comp_name in grouping_columns:
|
|
129
132
|
if comp_name not in operand.components:
|
|
130
133
|
raise SemanticError(
|
|
131
|
-
"1-1-1-10",
|
|
134
|
+
"1-1-1-10",
|
|
135
|
+
op=cls.op,
|
|
136
|
+
comp_name=comp_name,
|
|
137
|
+
dataset_name=operand.name,
|
|
132
138
|
)
|
|
133
139
|
if operand.components[comp_name].role != Role.IDENTIFIER:
|
|
134
140
|
raise SemanticError(
|
|
@@ -223,7 +229,7 @@ class Aggregation(Operator.Unary):
|
|
|
223
229
|
)
|
|
224
230
|
|
|
225
231
|
try:
|
|
226
|
-
return duckdb.query(query).to_df()
|
|
232
|
+
return duckdb.query(query).to_df().astype(object)
|
|
227
233
|
except RuntimeError as e:
|
|
228
234
|
if "Conversion" in e.args[0]:
|
|
229
235
|
raise SemanticError("2-3-8", op=cls.op, msg=e.args[0].split(":")[-1])
|
vtlengine/Operators/Analytic.py
CHANGED
|
@@ -29,10 +29,17 @@ from vtlengine.AST.Grammar.tokens import (
|
|
|
29
29
|
VAR_POP,
|
|
30
30
|
VAR_SAMP,
|
|
31
31
|
)
|
|
32
|
-
from vtlengine.DataTypes import
|
|
32
|
+
from vtlengine.DataTypes import (
|
|
33
|
+
COMP_NAME_MAPPING,
|
|
34
|
+
Integer,
|
|
35
|
+
Number,
|
|
36
|
+
unary_implicit_promotion,
|
|
37
|
+
)
|
|
33
38
|
from vtlengine.Exceptions import SemanticError
|
|
34
39
|
from vtlengine.Model import Component, Dataset, Role
|
|
35
40
|
|
|
41
|
+
return_integer_operators = [MAX, MIN, SUM]
|
|
42
|
+
|
|
36
43
|
|
|
37
44
|
# noinspection PyMethodOverriding
|
|
38
45
|
class Analytic(Operator.Unary):
|
|
@@ -47,10 +54,11 @@ class Analytic(Operator.Unary):
|
|
|
47
54
|
Evaluate: Ensures the type of data is the correct one to perform the Analytic operators.
|
|
48
55
|
"""
|
|
49
56
|
|
|
57
|
+
return_integer = None
|
|
50
58
|
sql_op: Optional[str] = None
|
|
51
59
|
|
|
52
60
|
@classmethod
|
|
53
|
-
def validate( # type: ignore[override]
|
|
61
|
+
def validate( # type: ignore[override] # noqa: C901
|
|
54
62
|
cls,
|
|
55
63
|
operand: Dataset,
|
|
56
64
|
partitioning: List[str],
|
|
@@ -66,7 +74,10 @@ class Analytic(Operator.Unary):
|
|
|
66
74
|
for comp_name in partitioning:
|
|
67
75
|
if comp_name not in operand.components:
|
|
68
76
|
raise SemanticError(
|
|
69
|
-
"1-1-1-10",
|
|
77
|
+
"1-1-1-10",
|
|
78
|
+
op=cls.op,
|
|
79
|
+
comp_name=comp_name,
|
|
80
|
+
dataset_name=operand.name,
|
|
70
81
|
)
|
|
71
82
|
if comp_name not in identifier_names:
|
|
72
83
|
raise SemanticError(
|
|
@@ -78,14 +89,21 @@ class Analytic(Operator.Unary):
|
|
|
78
89
|
for comp_name in order_components:
|
|
79
90
|
if comp_name not in operand.components:
|
|
80
91
|
raise SemanticError(
|
|
81
|
-
"1-1-1-10",
|
|
92
|
+
"1-1-1-10",
|
|
93
|
+
op=cls.op,
|
|
94
|
+
comp_name=comp_name,
|
|
95
|
+
dataset_name=operand.name,
|
|
82
96
|
)
|
|
83
97
|
if component_name is not None:
|
|
84
98
|
if cls.type_to_check is not None:
|
|
85
99
|
unary_implicit_promotion(
|
|
86
100
|
operand.components[component_name].data_type, cls.type_to_check
|
|
87
101
|
)
|
|
88
|
-
|
|
102
|
+
|
|
103
|
+
if cls.op in return_integer_operators:
|
|
104
|
+
cls.return_integer = isinstance(cls.return_type, Integer)
|
|
105
|
+
|
|
106
|
+
elif cls.return_type is not None:
|
|
89
107
|
result_components[component_name] = Component(
|
|
90
108
|
name=component_name,
|
|
91
109
|
data_type=cls.return_type,
|
|
@@ -106,14 +124,28 @@ class Analytic(Operator.Unary):
|
|
|
106
124
|
measures = operand.get_measures()
|
|
107
125
|
if len(measures) == 0:
|
|
108
126
|
raise SemanticError("1-1-1-8", op=cls.op, name=operand.name)
|
|
127
|
+
|
|
128
|
+
if cls.op in return_integer_operators:
|
|
129
|
+
isNumber = False
|
|
130
|
+
for measure in measures:
|
|
131
|
+
isNumber |= isinstance(measure.data_type, Number)
|
|
132
|
+
cls.return_integer = not isNumber
|
|
133
|
+
|
|
109
134
|
if cls.type_to_check is not None:
|
|
110
135
|
for measure in measures:
|
|
111
136
|
unary_implicit_promotion(measure.data_type, cls.type_to_check)
|
|
112
|
-
|
|
137
|
+
|
|
138
|
+
if cls.op in return_integer_operators:
|
|
139
|
+
for measure in measures:
|
|
140
|
+
new_measure = copy(measure)
|
|
141
|
+
new_measure.data_type = Integer if cls.return_integer else Number
|
|
142
|
+
result_components[measure.name] = new_measure
|
|
143
|
+
elif cls.return_type is not None:
|
|
113
144
|
for measure in measures:
|
|
114
145
|
new_measure = copy(measure)
|
|
115
146
|
new_measure.data_type = cls.return_type
|
|
116
147
|
result_components[measure.name] = new_measure
|
|
148
|
+
|
|
117
149
|
if cls.op == COUNT and len(measures) <= 1:
|
|
118
150
|
measure_name = COMP_NAME_MAPPING[cls.return_type]
|
|
119
151
|
nullable = False if len(measures) == 0 else measures[0].nullable
|
|
@@ -199,6 +231,8 @@ class Analytic(Operator.Unary):
|
|
|
199
231
|
measure_query = f"{cls.sql_op}({measure})"
|
|
200
232
|
if cls.op == COUNT and len(measure_names) == 1:
|
|
201
233
|
measure_query += f" {analytic_str} as {COMP_NAME_MAPPING[cls.return_type]}"
|
|
234
|
+
elif cls.op in return_integer_operators and cls.return_integer:
|
|
235
|
+
measure_query = f"CAST({measure_query} {analytic_str} AS INTEGER) as {measure}"
|
|
202
236
|
else:
|
|
203
237
|
measure_query += f" {analytic_str} as {measure}"
|
|
204
238
|
measure_queries.append(measure_query)
|
|
@@ -215,7 +249,7 @@ class Analytic(Operator.Unary):
|
|
|
215
249
|
df[measure_names] = df[measure_names].fillna(-1)
|
|
216
250
|
# if os.getenv("SPARK", False):
|
|
217
251
|
# df = df.to_pandas()
|
|
218
|
-
return duckdb.query(query).to_df()
|
|
252
|
+
return duckdb.query(query).to_df().astype(object)
|
|
219
253
|
|
|
220
254
|
@classmethod
|
|
221
255
|
def evaluate( # type: ignore[override]
|
|
@@ -245,6 +279,10 @@ class Analytic(Operator.Unary):
|
|
|
245
279
|
window=window,
|
|
246
280
|
params=params,
|
|
247
281
|
)
|
|
282
|
+
|
|
283
|
+
# if cls.return_type == Integer:
|
|
284
|
+
# result.data[measure_names] = result.data[measure_names].astype('Int64')
|
|
285
|
+
|
|
248
286
|
return result
|
|
249
287
|
|
|
250
288
|
|
|
@@ -255,6 +293,7 @@ class Max(Analytic):
|
|
|
255
293
|
|
|
256
294
|
op = MAX
|
|
257
295
|
sql_op = "MAX"
|
|
296
|
+
return_integer = False
|
|
258
297
|
|
|
259
298
|
|
|
260
299
|
class Min(Analytic):
|
|
@@ -264,6 +303,7 @@ class Min(Analytic):
|
|
|
264
303
|
|
|
265
304
|
op = MIN
|
|
266
305
|
sql_op = "MIN"
|
|
306
|
+
return_integer = False
|
|
267
307
|
|
|
268
308
|
|
|
269
309
|
class Sum(Analytic):
|
|
@@ -272,9 +312,8 @@ class Sum(Analytic):
|
|
|
272
312
|
"""
|
|
273
313
|
|
|
274
314
|
op = SUM
|
|
275
|
-
type_to_check = Number
|
|
276
|
-
return_type = Number
|
|
277
315
|
sql_op = "SUM"
|
|
316
|
+
return_integer = False
|
|
278
317
|
|
|
279
318
|
|
|
280
319
|
class Count(Analytic):
|