vtlengine 1.0.3rc3__py3-none-any.whl → 1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +288 -61
- vtlengine/API/__init__.py +269 -71
- vtlengine/API/data/schema/json_schema_2.1.json +116 -0
- vtlengine/AST/ASTComment.py +56 -0
- vtlengine/AST/ASTConstructor.py +76 -22
- vtlengine/AST/ASTConstructorModules/Expr.py +238 -120
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +126 -61
- vtlengine/AST/ASTConstructorModules/Terminals.py +97 -42
- vtlengine/AST/ASTConstructorModules/__init__.py +50 -0
- vtlengine/AST/ASTEncoders.py +5 -1
- vtlengine/AST/ASTString.py +608 -0
- vtlengine/AST/ASTTemplate.py +28 -2
- vtlengine/AST/DAG/__init__.py +10 -4
- vtlengine/AST/Grammar/lexer.py +0 -1
- vtlengine/AST/Grammar/parser.py +185 -440
- vtlengine/AST/VtlVisitor.py +0 -1
- vtlengine/AST/__init__.py +127 -14
- vtlengine/DataTypes/TimeHandling.py +50 -15
- vtlengine/DataTypes/__init__.py +79 -7
- vtlengine/Exceptions/__init__.py +3 -5
- vtlengine/Exceptions/messages.py +74 -105
- vtlengine/Interpreter/__init__.py +136 -46
- vtlengine/Model/__init__.py +14 -11
- vtlengine/Operators/Aggregation.py +17 -9
- vtlengine/Operators/Analytic.py +64 -20
- vtlengine/Operators/Assignment.py +0 -1
- vtlengine/Operators/CastOperator.py +44 -44
- vtlengine/Operators/Clause.py +16 -10
- vtlengine/Operators/Comparison.py +20 -12
- vtlengine/Operators/Conditional.py +47 -15
- vtlengine/Operators/General.py +9 -4
- vtlengine/Operators/HROperators.py +4 -14
- vtlengine/Operators/Join.py +15 -14
- vtlengine/Operators/Numeric.py +32 -26
- vtlengine/Operators/RoleSetter.py +6 -2
- vtlengine/Operators/Set.py +12 -8
- vtlengine/Operators/String.py +9 -9
- vtlengine/Operators/Time.py +145 -124
- vtlengine/Operators/Validation.py +10 -4
- vtlengine/Operators/__init__.py +56 -69
- vtlengine/Utils/__init__.py +55 -1
- vtlengine/__extras_check.py +17 -0
- vtlengine/__init__.py +2 -2
- vtlengine/files/output/__init__.py +2 -1
- vtlengine/files/output/_time_period_representation.py +2 -1
- vtlengine/files/parser/__init__.py +52 -46
- vtlengine/files/parser/_time_checking.py +4 -4
- {vtlengine-1.0.3rc3.dist-info → vtlengine-1.1.dist-info}/METADATA +21 -17
- vtlengine-1.1.dist-info/RECORD +61 -0
- {vtlengine-1.0.3rc3.dist-info → vtlengine-1.1.dist-info}/WHEEL +1 -1
- vtlengine/DataTypes/NumericTypesHandling.py +0 -38
- vtlengine-1.0.3rc3.dist-info/RECORD +0 -58
- {vtlengine-1.0.3rc3.dist-info → vtlengine-1.1.dist-info}/LICENSE.md +0 -0
|
@@ -1,16 +1,17 @@
|
|
|
1
1
|
from copy import copy, deepcopy
|
|
2
2
|
from dataclasses import dataclass
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Any, Dict, List, Optional, Union
|
|
4
|
+
from typing import Any, Dict, List, Optional, Type, Union
|
|
5
5
|
|
|
6
6
|
import pandas as pd
|
|
7
7
|
|
|
8
8
|
import vtlengine.AST as AST
|
|
9
9
|
import vtlengine.Exceptions
|
|
10
10
|
import vtlengine.Operators as Operators
|
|
11
|
+
from vtlengine.AST import VarID
|
|
11
12
|
from vtlengine.AST.ASTTemplate import ASTTemplate
|
|
12
13
|
from vtlengine.AST.DAG import HRDAGAnalyzer
|
|
13
|
-
from vtlengine.AST.DAG._words import DELETE, GLOBAL, INSERT
|
|
14
|
+
from vtlengine.AST.DAG._words import DELETE, GLOBAL, INSERT, PERSISTENT
|
|
14
15
|
from vtlengine.AST.Grammar.tokens import (
|
|
15
16
|
AGGREGATE,
|
|
16
17
|
ALL,
|
|
@@ -68,10 +69,19 @@ from vtlengine.Operators.CastOperator import Cast
|
|
|
68
69
|
from vtlengine.Operators.Comparison import Between, ExistIn
|
|
69
70
|
from vtlengine.Operators.Conditional import Case, If
|
|
70
71
|
from vtlengine.Operators.General import Eval
|
|
71
|
-
from vtlengine.Operators.HROperators import
|
|
72
|
+
from vtlengine.Operators.HROperators import (
|
|
73
|
+
HAAssignment,
|
|
74
|
+
Hierarchy,
|
|
75
|
+
get_measure_from_dataset,
|
|
76
|
+
)
|
|
72
77
|
from vtlengine.Operators.Numeric import Round, Trunc
|
|
73
78
|
from vtlengine.Operators.String import Instr, Replace, Substr
|
|
74
|
-
from vtlengine.Operators.Time import
|
|
79
|
+
from vtlengine.Operators.Time import (
|
|
80
|
+
Current_Date,
|
|
81
|
+
Date_Add,
|
|
82
|
+
Fill_time_series,
|
|
83
|
+
Time_Aggregation,
|
|
84
|
+
)
|
|
75
85
|
from vtlengine.Operators.Validation import Check, Check_Datapoint, Check_Hierarchy
|
|
76
86
|
from vtlengine.Utils import (
|
|
77
87
|
AGGREGATION_MAPPING,
|
|
@@ -104,6 +114,8 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
104
114
|
output_path: Optional[Union[str, Path]] = None
|
|
105
115
|
# Time Period Representation
|
|
106
116
|
time_period_representation: Optional[TimePeriodRepresentation] = None
|
|
117
|
+
# Return only persistent
|
|
118
|
+
return_only_persistent: bool = True
|
|
107
119
|
# Flags to change behavior
|
|
108
120
|
nested_condition: Union[str, bool] = False
|
|
109
121
|
is_from_assignment: bool = False
|
|
@@ -153,7 +165,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
153
165
|
for ds_name in self.ds_analysis[INSERT][statement_num]:
|
|
154
166
|
if ds_name in self.datapoints_paths:
|
|
155
167
|
self.datasets[ds_name].data = load_datapoints(
|
|
156
|
-
self.datasets[ds_name].components,
|
|
168
|
+
self.datasets[ds_name].components,
|
|
169
|
+
ds_name,
|
|
170
|
+
self.datapoints_paths[ds_name],
|
|
157
171
|
)
|
|
158
172
|
elif ds_name in self.datasets and self.datasets[ds_name].data is None:
|
|
159
173
|
_fill_dataset_empty_data(self.datasets[ds_name])
|
|
@@ -177,10 +191,14 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
177
191
|
# We do not save global input datasets, only results of transformations
|
|
178
192
|
self.datasets[ds_name].data = None
|
|
179
193
|
continue
|
|
180
|
-
|
|
194
|
+
if self.return_only_persistent and ds_name not in self.ds_analysis[PERSISTENT]:
|
|
195
|
+
self.datasets[ds_name].data = None
|
|
196
|
+
continue
|
|
181
197
|
# Saving only datasets, no scalars
|
|
182
198
|
save_datapoints(
|
|
183
|
-
self.time_period_representation,
|
|
199
|
+
self.time_period_representation,
|
|
200
|
+
self.datasets[ds_name],
|
|
201
|
+
self.output_path,
|
|
184
202
|
)
|
|
185
203
|
self.datasets[ds_name].data = None
|
|
186
204
|
|
|
@@ -201,8 +219,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
201
219
|
if isinstance(child, (AST.Assignment, AST.PersistentAssignment)):
|
|
202
220
|
vtlengine.Exceptions.dataset_output = child.left.value # type: ignore[attr-defined]
|
|
203
221
|
self._load_datapoints_efficient(statement_num)
|
|
204
|
-
if
|
|
205
|
-
|
|
222
|
+
if not isinstance(
|
|
223
|
+
child, (AST.HRuleset, AST.DPRuleset, AST.Operator)
|
|
224
|
+
) and not isinstance(child, (AST.Assignment, AST.PersistentAssignment)):
|
|
206
225
|
raise SemanticError("1-3-17")
|
|
207
226
|
result = self.visit(child)
|
|
208
227
|
|
|
@@ -229,15 +248,14 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
229
248
|
# Definition Language
|
|
230
249
|
|
|
231
250
|
def visit_Operator(self, node: AST.Operator) -> None:
|
|
232
|
-
|
|
233
251
|
if self.udos is None:
|
|
234
252
|
self.udos = {}
|
|
235
253
|
elif node.op in self.udos:
|
|
236
254
|
raise ValueError(f"User Defined Operator {node.op} already exists")
|
|
237
255
|
|
|
238
|
-
param_info = []
|
|
256
|
+
param_info: List[Dict[str, Union[str, Type[ScalarType], AST.AST]]] = []
|
|
239
257
|
for param in node.parameters:
|
|
240
|
-
if param.name in param_info:
|
|
258
|
+
if param.name in [x["name"] for x in param_info]:
|
|
241
259
|
raise ValueError(f"Duplicated Parameter {param.name} in UDO {node.op}")
|
|
242
260
|
# We use a string for model types, but the data type class for basic types
|
|
243
261
|
# (Integer, Number, String, Boolean, ...)
|
|
@@ -262,7 +280,6 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
262
280
|
}
|
|
263
281
|
|
|
264
282
|
def visit_DPRuleset(self, node: AST.DPRuleset) -> None:
|
|
265
|
-
|
|
266
283
|
# Rule names are optional, if not provided, they are generated.
|
|
267
284
|
# If provided, all must be provided
|
|
268
285
|
rule_names = [rule.name for rule in node.rules if rule.name is not None]
|
|
@@ -323,7 +340,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
323
340
|
for i, rule in enumerate(node.rules):
|
|
324
341
|
rule.name = (i + 1).__str__()
|
|
325
342
|
|
|
326
|
-
cond_comp = []
|
|
343
|
+
cond_comp: List[Any] = []
|
|
327
344
|
if isinstance(node.element, list):
|
|
328
345
|
cond_comp = [x.value for x in node.element[:-1]]
|
|
329
346
|
node.element = node.element[-1]
|
|
@@ -357,8 +374,10 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
357
374
|
def visit_PersistentAssignment(self, node: AST.PersistentAssignment) -> Any:
|
|
358
375
|
return self.visit_Assignment(node)
|
|
359
376
|
|
|
360
|
-
def
|
|
377
|
+
def visit_ParFunction(self, node: AST.ParFunction) -> Any:
|
|
378
|
+
return self.visit(node.operand)
|
|
361
379
|
|
|
380
|
+
def visit_BinOp(self, node: AST.BinOp) -> Any:
|
|
362
381
|
is_from_if = False
|
|
363
382
|
if (
|
|
364
383
|
not self.is_from_condition
|
|
@@ -369,13 +388,23 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
369
388
|
is_from_if = self.is_from_if
|
|
370
389
|
self.is_from_if = False
|
|
371
390
|
|
|
372
|
-
if (
|
|
373
|
-
|
|
391
|
+
if (
|
|
392
|
+
self.is_from_join
|
|
393
|
+
and node.op in [MEMBERSHIP, AGGREGATE]
|
|
394
|
+
and hasattr(node.left, "value")
|
|
395
|
+
and hasattr(node.right, "value")
|
|
396
|
+
):
|
|
374
397
|
if self.udo_params is not None and node.right.value in self.udo_params[-1]:
|
|
375
398
|
comp_name = f"{node.left.value}#{self.udo_params[-1][node.right.value]}"
|
|
376
399
|
else:
|
|
377
400
|
comp_name = f"{node.left.value}#{node.right.value}"
|
|
378
|
-
ast_var_id = AST.VarID(
|
|
401
|
+
ast_var_id = AST.VarID(
|
|
402
|
+
value=comp_name,
|
|
403
|
+
line_start=node.right.line_start,
|
|
404
|
+
line_stop=node.right.line_stop,
|
|
405
|
+
column_start=node.right.column_start,
|
|
406
|
+
column_stop=node.right.column_stop,
|
|
407
|
+
)
|
|
379
408
|
return self.visit(ast_var_id)
|
|
380
409
|
left_operand = self.visit(node.left)
|
|
381
410
|
right_operand = self.visit(node.right)
|
|
@@ -421,7 +450,10 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
421
450
|
if node.operand is not None and operand is not None:
|
|
422
451
|
op_comp: DataComponent = self.visit(node.operand)
|
|
423
452
|
comps_to_keep = {}
|
|
424
|
-
for
|
|
453
|
+
for (
|
|
454
|
+
comp_name,
|
|
455
|
+
comp,
|
|
456
|
+
) in self.regular_aggregation_dataset.components.items():
|
|
425
457
|
if comp.role == Role.IDENTIFIER:
|
|
426
458
|
comps_to_keep[comp_name] = copy(comp)
|
|
427
459
|
comps_to_keep[op_comp.name] = Component(
|
|
@@ -554,6 +586,8 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
554
586
|
for comp_name in node.partition_by:
|
|
555
587
|
if comp_name in self.udo_params[-1]:
|
|
556
588
|
partitioning.append(self.udo_params[-1][comp_name])
|
|
589
|
+
elif comp_name in operand.get_identifiers_names():
|
|
590
|
+
partitioning.append(comp_name)
|
|
557
591
|
else:
|
|
558
592
|
raise SemanticError(
|
|
559
593
|
"2-3-9",
|
|
@@ -565,7 +599,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
565
599
|
for o in node.order_by:
|
|
566
600
|
if o.component in self.udo_params[-1]:
|
|
567
601
|
o.component = self.udo_params[-1][o.component]
|
|
568
|
-
|
|
602
|
+
elif o.component not in operand.get_identifiers_names():
|
|
569
603
|
raise SemanticError(
|
|
570
604
|
"2-3-9",
|
|
571
605
|
comp_type="Component",
|
|
@@ -744,8 +778,12 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
744
778
|
is_partial_present = 0
|
|
745
779
|
found_comp = None
|
|
746
780
|
for comp_name in self.regular_aggregation_dataset.get_components_names():
|
|
747
|
-
if (
|
|
748
|
-
|
|
781
|
+
if (
|
|
782
|
+
"#" in comp_name
|
|
783
|
+
and comp_name.split("#")[1] == node.value
|
|
784
|
+
or "#" in node.value
|
|
785
|
+
and node.value.split("#")[1] == comp_name
|
|
786
|
+
):
|
|
749
787
|
is_partial_present += 1
|
|
750
788
|
found_comp = comp_name
|
|
751
789
|
if is_partial_present == 0:
|
|
@@ -756,7 +794,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
756
794
|
)
|
|
757
795
|
elif is_partial_present == 2:
|
|
758
796
|
raise SemanticError("1-1-13-9", comp_name=node.value)
|
|
759
|
-
node.value = found_comp
|
|
797
|
+
node.value = found_comp # type:ignore[assignment]
|
|
760
798
|
if node.value not in self.regular_aggregation_dataset.components:
|
|
761
799
|
raise SemanticError(
|
|
762
800
|
"1-1-1-10",
|
|
@@ -783,7 +821,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
783
821
|
comp_name = self.ruleset_signature[node.value]
|
|
784
822
|
if comp_name not in self.ruleset_dataset.components:
|
|
785
823
|
raise SemanticError(
|
|
786
|
-
"1-1-1-10",
|
|
824
|
+
"1-1-1-10",
|
|
825
|
+
comp_name=node.value,
|
|
826
|
+
dataset_name=self.ruleset_dataset.name,
|
|
787
827
|
)
|
|
788
828
|
data = None if self.rule_data is None else self.rule_data[comp_name]
|
|
789
829
|
return DataComponent(
|
|
@@ -938,7 +978,6 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
938
978
|
return REGULAR_AGGREGATION_MAPPING[node.op].analyze(operands, dataset)
|
|
939
979
|
|
|
940
980
|
def visit_If(self, node: AST.If) -> Dataset:
|
|
941
|
-
|
|
942
981
|
self.is_from_condition = True
|
|
943
982
|
condition = self.visit(node.condition)
|
|
944
983
|
self.is_from_condition = False
|
|
@@ -948,7 +987,10 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
948
987
|
elseValue = self.visit(node.elseOp)
|
|
949
988
|
if not isinstance(thenValue, Scalar) or not isinstance(elseValue, Scalar):
|
|
950
989
|
raise SemanticError(
|
|
951
|
-
"1-1-9-3",
|
|
990
|
+
"1-1-9-3",
|
|
991
|
+
op="If_op",
|
|
992
|
+
then_name=thenValue.name,
|
|
993
|
+
else_name=elseValue.name,
|
|
952
994
|
)
|
|
953
995
|
if condition.value:
|
|
954
996
|
return self.visit(node.thenOp)
|
|
@@ -1024,7 +1066,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1024
1066
|
|
|
1025
1067
|
def visit_Constant(self, node: AST.Constant) -> Any:
|
|
1026
1068
|
return Scalar(
|
|
1027
|
-
name=str(node.value),
|
|
1069
|
+
name=str(node.value),
|
|
1070
|
+
value=node.value,
|
|
1071
|
+
data_type=BASIC_TYPES[type(node.value)],
|
|
1028
1072
|
)
|
|
1029
1073
|
|
|
1030
1074
|
def visit_JoinOp(self, node: AST.JoinOp) -> None:
|
|
@@ -1130,7 +1174,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1130
1174
|
for comp_name in node.children[2:]:
|
|
1131
1175
|
if comp_name.__str__() not in dataset_element.components:
|
|
1132
1176
|
raise SemanticError(
|
|
1133
|
-
"1-1-1-10",
|
|
1177
|
+
"1-1-1-10",
|
|
1178
|
+
comp_name=comp_name,
|
|
1179
|
+
dataset_name=dataset_element.name,
|
|
1134
1180
|
)
|
|
1135
1181
|
if dpr_info is not None and dpr_info["signature_type"] == "variable":
|
|
1136
1182
|
for i, comp_name in enumerate(node.children[2:]):
|
|
@@ -1164,7 +1210,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1164
1210
|
|
|
1165
1211
|
# Datapoint Ruleset final evaluation
|
|
1166
1212
|
return Check_Datapoint.analyze(
|
|
1167
|
-
dataset_element=dataset_element,
|
|
1213
|
+
dataset_element=dataset_element,
|
|
1214
|
+
rule_info=rule_output_values,
|
|
1215
|
+
output=output,
|
|
1168
1216
|
)
|
|
1169
1217
|
elif node.op in (CHECK_HIERARCHY, HIERARCHY):
|
|
1170
1218
|
if len(node.children) == 3:
|
|
@@ -1203,7 +1251,10 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1203
1251
|
and hr_info["signature"] != component
|
|
1204
1252
|
):
|
|
1205
1253
|
raise SemanticError(
|
|
1206
|
-
"1-1-10-3",
|
|
1254
|
+
"1-1-10-3",
|
|
1255
|
+
op=node.op,
|
|
1256
|
+
found=component,
|
|
1257
|
+
expected=hr_info["signature"],
|
|
1207
1258
|
)
|
|
1208
1259
|
elif hr_info["node"].signature_type == "valuedomain" and component is None:
|
|
1209
1260
|
raise SemanticError("1-1-10-4", op=node.op)
|
|
@@ -1215,7 +1266,10 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1215
1266
|
and cond_components[i] != cond_comp
|
|
1216
1267
|
):
|
|
1217
1268
|
raise SemanticError(
|
|
1218
|
-
"1-1-10-6",
|
|
1269
|
+
"1-1-10-6",
|
|
1270
|
+
op=node.op,
|
|
1271
|
+
expected=cond_comp,
|
|
1272
|
+
found=cond_components[i],
|
|
1219
1273
|
)
|
|
1220
1274
|
cond_info[cond_comp] = cond_components[i]
|
|
1221
1275
|
|
|
@@ -1235,6 +1289,10 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1235
1289
|
signature_type=hr_info["node"].signature_type,
|
|
1236
1290
|
element=hr_info["node"].element,
|
|
1237
1291
|
rules=aux,
|
|
1292
|
+
line_start=node.line_start,
|
|
1293
|
+
line_stop=node.line_stop,
|
|
1294
|
+
column_start=node.column_start,
|
|
1295
|
+
column_stop=node.column_stop,
|
|
1238
1296
|
)
|
|
1239
1297
|
HRDAGAnalyzer().visit(hierarchy_ast)
|
|
1240
1298
|
|
|
@@ -1270,7 +1328,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1270
1328
|
# Final evaluation
|
|
1271
1329
|
if node.op == CHECK_HIERARCHY:
|
|
1272
1330
|
result = Check_Hierarchy.analyze(
|
|
1273
|
-
dataset_element=dataset,
|
|
1331
|
+
dataset_element=dataset,
|
|
1332
|
+
rule_info=rule_output_values,
|
|
1333
|
+
output=output,
|
|
1274
1334
|
)
|
|
1275
1335
|
del rule_output_values
|
|
1276
1336
|
else:
|
|
@@ -1396,10 +1456,12 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1396
1456
|
left_operand.data = pd.DataFrame({measure_name: []})
|
|
1397
1457
|
if right_operand.data is None:
|
|
1398
1458
|
right_operand.data = pd.DataFrame({measure_name: []})
|
|
1399
|
-
left_null_indexes = set(
|
|
1400
|
-
measure_name].isnull()].index
|
|
1401
|
-
|
|
1402
|
-
|
|
1459
|
+
left_null_indexes = set(
|
|
1460
|
+
left_operand.data[left_operand.data[measure_name].isnull()].index
|
|
1461
|
+
)
|
|
1462
|
+
right_null_indexes = set(
|
|
1463
|
+
right_operand.data[right_operand.data[measure_name].isnull()].index
|
|
1464
|
+
)
|
|
1403
1465
|
# If no indexes are in common, then one datapoint is not null
|
|
1404
1466
|
invalid_indexes = list(left_null_indexes.intersection(right_null_indexes))
|
|
1405
1467
|
if len(invalid_indexes) > 0:
|
|
@@ -1415,7 +1477,6 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1415
1477
|
return HR_UNARY_MAPPING[node.op].analyze(operand)
|
|
1416
1478
|
|
|
1417
1479
|
def visit_Validation(self, node: AST.Validation) -> Dataset:
|
|
1418
|
-
|
|
1419
1480
|
validation_element = self.visit(node.validation)
|
|
1420
1481
|
if not isinstance(validation_element, Dataset):
|
|
1421
1482
|
raise ValueError(f"Expected dataset, got {type(validation_element).__name__}")
|
|
@@ -1532,12 +1593,26 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1532
1593
|
components.update(
|
|
1533
1594
|
{
|
|
1534
1595
|
name: Component(
|
|
1535
|
-
name=name,
|
|
1596
|
+
name=name,
|
|
1597
|
+
data_type=BASIC_TYPES[int],
|
|
1598
|
+
role=Role.MEASURE,
|
|
1599
|
+
nullable=True,
|
|
1536
1600
|
)
|
|
1537
1601
|
}
|
|
1538
1602
|
)
|
|
1603
|
+
|
|
1604
|
+
if self.condition_stack and len(self.condition_stack) > 0:
|
|
1605
|
+
last_condition_dataset = (
|
|
1606
|
+
self.then_condition_dataset[-1]
|
|
1607
|
+
if self.condition_stack[-1] == THEN_ELSE["then"]
|
|
1608
|
+
else (self.else_condition_dataset[-1])
|
|
1609
|
+
)
|
|
1610
|
+
measure_name = last_condition_dataset.get_measures_names()[0]
|
|
1611
|
+
then_data = then_data[then_data[name].isin(last_condition_dataset.data[measure_name])]
|
|
1612
|
+
else_data = else_data[else_data[name].isin(last_condition_dataset.data[measure_name])]
|
|
1539
1613
|
then_dataset = Dataset(name=name, components=components, data=then_data)
|
|
1540
1614
|
else_dataset = Dataset(name=name, components=components, data=else_data)
|
|
1615
|
+
|
|
1541
1616
|
self.then_condition_dataset.append(then_dataset)
|
|
1542
1617
|
self.else_condition_dataset.append(else_dataset)
|
|
1543
1618
|
|
|
@@ -1548,14 +1623,16 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1548
1623
|
or self.condition_stack is None
|
|
1549
1624
|
):
|
|
1550
1625
|
return left_operand, right_operand
|
|
1626
|
+
|
|
1551
1627
|
merge_dataset = (
|
|
1552
1628
|
self.then_condition_dataset.pop()
|
|
1553
1629
|
if self.condition_stack.pop() == THEN_ELSE["then"]
|
|
1554
1630
|
else (self.else_condition_dataset.pop())
|
|
1555
1631
|
)
|
|
1632
|
+
|
|
1556
1633
|
merge_index = merge_dataset.data[merge_dataset.get_measures_names()[0]].to_list()
|
|
1557
1634
|
ids = merge_dataset.get_identifiers_names()
|
|
1558
|
-
if isinstance(left_operand, Dataset
|
|
1635
|
+
if isinstance(left_operand, (Dataset, DataComponent)):
|
|
1559
1636
|
if left_operand.data is None:
|
|
1560
1637
|
return left_operand, right_operand
|
|
1561
1638
|
if isinstance(left_operand, Dataset):
|
|
@@ -1571,7 +1648,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1571
1648
|
else:
|
|
1572
1649
|
left = left_operand.data
|
|
1573
1650
|
left_operand.data = left.reindex(merge_index, fill_value=None)
|
|
1574
|
-
if isinstance(right_operand, Dataset
|
|
1651
|
+
if isinstance(right_operand, (Dataset, DataComponent)):
|
|
1575
1652
|
if right_operand.data is None:
|
|
1576
1653
|
return left_operand, right_operand
|
|
1577
1654
|
if isinstance(right_operand, Dataset):
|
|
@@ -1737,9 +1814,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1737
1814
|
signature_values[param["name"]] = self.visit(node.params[i])
|
|
1738
1815
|
elif param["type"] in ["Dataset", "Component"]:
|
|
1739
1816
|
if isinstance(node.params[i], AST.VarID):
|
|
1740
|
-
signature_values[param["name"]] = node.params[
|
|
1741
|
-
i
|
|
1742
|
-
].value # type: ignore[attr-defined]
|
|
1817
|
+
signature_values[param["name"]] = node.params[i].value # type: ignore[attr-defined]
|
|
1743
1818
|
else:
|
|
1744
1819
|
param_element = self.visit(node.params[i])
|
|
1745
1820
|
if isinstance(param_element, Dataset):
|
|
@@ -1831,8 +1906,23 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1831
1906
|
return result
|
|
1832
1907
|
|
|
1833
1908
|
def visit_TimeAggregation(self, node: AST.TimeAggregation) -> None:
|
|
1834
|
-
|
|
1835
|
-
|
|
1909
|
+
if node.operand is not None:
|
|
1910
|
+
operand = self.visit(node.operand)
|
|
1911
|
+
else:
|
|
1912
|
+
if self.aggregation_dataset is None:
|
|
1913
|
+
raise SemanticError("1-1-19-11")
|
|
1914
|
+
component_name = Time_Aggregation._get_time_id(self.aggregation_dataset)
|
|
1915
|
+
ast_operand = VarID(
|
|
1916
|
+
value=component_name,
|
|
1917
|
+
line_start=node.line_start,
|
|
1918
|
+
line_stop=node.line_stop,
|
|
1919
|
+
column_start=node.column_start,
|
|
1920
|
+
column_stop=node.column_stop,
|
|
1921
|
+
)
|
|
1922
|
+
operand = self.visit(ast_operand)
|
|
1836
1923
|
return Time_Aggregation.analyze(
|
|
1837
|
-
operand=operand,
|
|
1924
|
+
operand=operand,
|
|
1925
|
+
period_from=node.period_from,
|
|
1926
|
+
period_to=node.period_to,
|
|
1927
|
+
conf=node.conf,
|
|
1838
1928
|
)
|
vtlengine/Model/__init__.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import inspect
|
|
1
2
|
import json
|
|
2
3
|
from collections import Counter
|
|
3
4
|
from dataclasses import dataclass
|
|
@@ -128,10 +129,14 @@ class Component:
|
|
|
128
129
|
)
|
|
129
130
|
|
|
130
131
|
def to_dict(self) -> Dict[str, Any]:
|
|
132
|
+
data_type = self.data_type
|
|
133
|
+
if not inspect.isclass(self.data_type):
|
|
134
|
+
data_type = self.data_type.__class__ # type: ignore[assignment]
|
|
131
135
|
return {
|
|
132
136
|
"name": self.name,
|
|
133
|
-
"data_type": DataTypes.SCALAR_TYPES_CLASS_REVERSE[
|
|
134
|
-
|
|
137
|
+
"data_type": DataTypes.SCALAR_TYPES_CLASS_REVERSE[data_type],
|
|
138
|
+
# Need to check here for NoneType as UDO argument has it
|
|
139
|
+
"role": self.role.value if self.role is not None else None, # type: ignore[redundant-expr]
|
|
135
140
|
"nullable": self.nullable,
|
|
136
141
|
}
|
|
137
142
|
|
|
@@ -229,23 +234,21 @@ class Dataset:
|
|
|
229
234
|
self.data[comp.name] = self.data[comp.name].astype(str)
|
|
230
235
|
other.data[comp.name] = other.data[comp.name].astype(str)
|
|
231
236
|
self.data[comp.name] = self.data[comp.name].map(
|
|
232
|
-
lambda x: str(TimePeriodHandler(x)) if x != "" else "",
|
|
237
|
+
lambda x: str(TimePeriodHandler(str(x))) if x != "" else "",
|
|
238
|
+
na_action="ignore",
|
|
233
239
|
)
|
|
234
240
|
other.data[comp.name] = other.data[comp.name].map(
|
|
235
|
-
lambda x: str(TimePeriodHandler(x)) if x != "" else "",
|
|
241
|
+
lambda x: str(TimePeriodHandler(str(x))) if x != "" else "",
|
|
242
|
+
na_action="ignore",
|
|
236
243
|
)
|
|
237
244
|
elif type_name in ["Integer", "Number"]:
|
|
238
245
|
type_ = "int64" if type_name == "Integer" else "float32"
|
|
239
246
|
# We use here a number to avoid errors on equality on empty strings
|
|
240
247
|
self.data[comp.name] = (
|
|
241
|
-
self.data[comp.name]
|
|
242
|
-
.replace("", -1234997)
|
|
243
|
-
.astype(type_) # type: ignore[call-overload]
|
|
248
|
+
self.data[comp.name].replace("", -1234997).astype(type_) # type: ignore[call-overload]
|
|
244
249
|
)
|
|
245
250
|
other.data[comp.name] = (
|
|
246
|
-
other.data[comp.name]
|
|
247
|
-
.replace("", -1234997)
|
|
248
|
-
.astype(type_) # type: ignore[call-overload]
|
|
251
|
+
other.data[comp.name].replace("", -1234997).astype(type_) # type: ignore[call-overload]
|
|
249
252
|
)
|
|
250
253
|
try:
|
|
251
254
|
assert_frame_equal(
|
|
@@ -334,7 +337,7 @@ class Dataset:
|
|
|
334
337
|
return {
|
|
335
338
|
"name": self.name,
|
|
336
339
|
"components": {k: v.to_dict() for k, v in self.components.items()},
|
|
337
|
-
"data": self.data.to_dict(orient="records") if self.data is not None else None,
|
|
340
|
+
"data": (self.data.to_dict(orient="records") if self.data is not None else None),
|
|
338
341
|
}
|
|
339
342
|
|
|
340
343
|
def to_json(self) -> str:
|
|
@@ -29,8 +29,8 @@ from vtlengine.DataTypes import (
|
|
|
29
29
|
unary_implicit_promotion,
|
|
30
30
|
)
|
|
31
31
|
from vtlengine.DataTypes.TimeHandling import (
|
|
32
|
-
|
|
33
|
-
|
|
32
|
+
PERIOD_IND_MAPPING,
|
|
33
|
+
PERIOD_IND_MAPPING_REVERSE,
|
|
34
34
|
TimeIntervalHandler,
|
|
35
35
|
TimePeriodHandler,
|
|
36
36
|
)
|
|
@@ -78,7 +78,7 @@ class Aggregation(Operator.Unary):
|
|
|
78
78
|
data[measure.name] = (
|
|
79
79
|
data[measure.name]
|
|
80
80
|
.astype(object)
|
|
81
|
-
.map(lambda x: TimePeriodHandler(x), na_action="ignore")
|
|
81
|
+
.map(lambda x: TimePeriodHandler(str(x)), na_action="ignore")
|
|
82
82
|
)
|
|
83
83
|
else:
|
|
84
84
|
data[measure.name] = data[measure.name].map(
|
|
@@ -89,7 +89,10 @@ class Aggregation(Operator.Unary):
|
|
|
89
89
|
data[measure.name] = (
|
|
90
90
|
data[measure.name]
|
|
91
91
|
.astype(object)
|
|
92
|
-
.map(
|
|
92
|
+
.map(
|
|
93
|
+
lambda x: TimeIntervalHandler.from_iso_format(str(x)),
|
|
94
|
+
na_action="ignore",
|
|
95
|
+
)
|
|
93
96
|
)
|
|
94
97
|
else:
|
|
95
98
|
data[measure.name] = data[measure.name].map(
|
|
@@ -100,11 +103,13 @@ class Aggregation(Operator.Unary):
|
|
|
100
103
|
elif measure.data_type == Duration:
|
|
101
104
|
if mode == "input":
|
|
102
105
|
data[measure.name] = data[measure.name].map(
|
|
103
|
-
lambda x:
|
|
106
|
+
lambda x: PERIOD_IND_MAPPING[x], # type: ignore[index]
|
|
107
|
+
na_action="ignore",
|
|
104
108
|
)
|
|
105
109
|
else:
|
|
106
110
|
data[measure.name] = data[measure.name].map(
|
|
107
|
-
lambda x:
|
|
111
|
+
lambda x: PERIOD_IND_MAPPING_REVERSE[x], # type: ignore[index]
|
|
112
|
+
na_action="ignore",
|
|
108
113
|
)
|
|
109
114
|
elif measure.data_type == Boolean:
|
|
110
115
|
if mode == "result":
|
|
@@ -128,7 +133,10 @@ class Aggregation(Operator.Unary):
|
|
|
128
133
|
for comp_name in grouping_columns:
|
|
129
134
|
if comp_name not in operand.components:
|
|
130
135
|
raise SemanticError(
|
|
131
|
-
"1-1-1-10",
|
|
136
|
+
"1-1-1-10",
|
|
137
|
+
op=cls.op,
|
|
138
|
+
comp_name=comp_name,
|
|
139
|
+
dataset_name=operand.name,
|
|
132
140
|
)
|
|
133
141
|
if operand.components[comp_name].role != Role.IDENTIFIER:
|
|
134
142
|
raise SemanticError(
|
|
@@ -202,7 +210,7 @@ class Aggregation(Operator.Unary):
|
|
|
202
210
|
e = f'"{e}"'
|
|
203
211
|
if cls.type_to_check is not None and cls.op != COUNT:
|
|
204
212
|
functions += (
|
|
205
|
-
f"{cls.py_op}(CAST({e} AS
|
|
213
|
+
f"{cls.py_op}(CAST({e} AS DOUBLE)) AS {e}, " # Count can only be one here
|
|
206
214
|
)
|
|
207
215
|
elif cls.op == COUNT:
|
|
208
216
|
functions += f"{cls.py_op}({e}) AS int_var, "
|
|
@@ -223,7 +231,7 @@ class Aggregation(Operator.Unary):
|
|
|
223
231
|
)
|
|
224
232
|
|
|
225
233
|
try:
|
|
226
|
-
return duckdb.query(query).to_df()
|
|
234
|
+
return duckdb.query(query).to_df().astype(object)
|
|
227
235
|
except RuntimeError as e:
|
|
228
236
|
if "Conversion" in e.args[0]:
|
|
229
237
|
raise SemanticError("2-3-8", op=cls.op, msg=e.args[0].split(":")[-1])
|