vtlengine 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +19 -8
- vtlengine/API/__init__.py +9 -9
- vtlengine/AST/ASTConstructor.py +23 -43
- vtlengine/AST/ASTConstructorModules/Expr.py +147 -71
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +104 -40
- vtlengine/AST/ASTConstructorModules/Terminals.py +28 -39
- vtlengine/AST/ASTTemplate.py +16 -1
- vtlengine/AST/DAG/__init__.py +12 -15
- vtlengine/AST/Grammar/Vtl.g4 +49 -20
- vtlengine/AST/Grammar/VtlTokens.g4 +13 -1
- vtlengine/AST/Grammar/lexer.py +1293 -1183
- vtlengine/AST/Grammar/parser.py +5758 -3939
- vtlengine/AST/Grammar/tokens.py +12 -0
- vtlengine/AST/VtlVisitor.py +9 -2
- vtlengine/AST/__init__.py +21 -3
- vtlengine/DataTypes/TimeHandling.py +12 -7
- vtlengine/DataTypes/__init__.py +17 -24
- vtlengine/Exceptions/__init__.py +43 -1
- vtlengine/Exceptions/messages.py +82 -62
- vtlengine/Interpreter/__init__.py +125 -120
- vtlengine/Model/__init__.py +17 -12
- vtlengine/Operators/Aggregation.py +14 -14
- vtlengine/Operators/Analytic.py +56 -31
- vtlengine/Operators/Assignment.py +2 -3
- vtlengine/Operators/Boolean.py +5 -7
- vtlengine/Operators/CastOperator.py +12 -13
- vtlengine/Operators/Clause.py +11 -13
- vtlengine/Operators/Comparison.py +31 -17
- vtlengine/Operators/Conditional.py +157 -17
- vtlengine/Operators/General.py +4 -4
- vtlengine/Operators/HROperators.py +41 -34
- vtlengine/Operators/Join.py +18 -22
- vtlengine/Operators/Numeric.py +76 -39
- vtlengine/Operators/RoleSetter.py +6 -8
- vtlengine/Operators/Set.py +7 -12
- vtlengine/Operators/String.py +19 -27
- vtlengine/Operators/Time.py +366 -43
- vtlengine/Operators/Validation.py +4 -7
- vtlengine/Operators/__init__.py +38 -41
- vtlengine/Utils/__init__.py +149 -94
- vtlengine/__init__.py +1 -1
- vtlengine/files/output/__init__.py +2 -2
- vtlengine/files/output/_time_period_representation.py +0 -1
- vtlengine/files/parser/__init__.py +18 -18
- vtlengine/files/parser/_time_checking.py +3 -2
- {vtlengine-1.0.1.dist-info → vtlengine-1.0.3.dist-info}/METADATA +17 -5
- vtlengine-1.0.3.dist-info/RECORD +58 -0
- vtlengine-1.0.1.dist-info/RECORD +0 -58
- {vtlengine-1.0.1.dist-info → vtlengine-1.0.3.dist-info}/LICENSE.md +0 -0
- {vtlengine-1.0.1.dist-info → vtlengine-1.0.3.dist-info}/WHEEL +0 -0
|
@@ -3,61 +3,35 @@ from dataclasses import dataclass
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from typing import Any, Dict, List, Optional, Union
|
|
5
5
|
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
6
8
|
import vtlengine.AST as AST
|
|
7
9
|
import vtlengine.Exceptions
|
|
8
10
|
import vtlengine.Operators as Operators
|
|
9
|
-
import pandas as pd
|
|
10
|
-
from vtlengine.DataTypes import (
|
|
11
|
-
BASIC_TYPES,
|
|
12
|
-
check_unary_implicit_promotion,
|
|
13
|
-
ScalarType,
|
|
14
|
-
Boolean,
|
|
15
|
-
SCALAR_TYPES_CLASS_REVERSE,
|
|
16
|
-
)
|
|
17
|
-
from vtlengine.Operators.Aggregation import extract_grouping_identifiers
|
|
18
|
-
from vtlengine.Operators.Assignment import Assignment
|
|
19
|
-
from vtlengine.Operators.CastOperator import Cast
|
|
20
|
-
from vtlengine.Operators.Comparison import Between, ExistIn
|
|
21
|
-
from vtlengine.Operators.Conditional import If
|
|
22
|
-
from vtlengine.Operators.General import Eval
|
|
23
|
-
from vtlengine.Operators.HROperators import get_measure_from_dataset, HAAssignment, Hierarchy
|
|
24
|
-
from vtlengine.Operators.Numeric import Round, Trunc
|
|
25
|
-
from vtlengine.Operators.String import Instr, Replace, Substr
|
|
26
|
-
from vtlengine.Operators.Time import Fill_time_series, Time_Aggregation, Current_Date
|
|
27
|
-
from vtlengine.Operators.Validation import Check, Check_Datapoint, Check_Hierarchy
|
|
28
|
-
from vtlengine.Utils import (
|
|
29
|
-
AGGREGATION_MAPPING,
|
|
30
|
-
ANALYTIC_MAPPING,
|
|
31
|
-
BINARY_MAPPING,
|
|
32
|
-
JOIN_MAPPING,
|
|
33
|
-
REGULAR_AGGREGATION_MAPPING,
|
|
34
|
-
ROLE_SETTER_MAPPING,
|
|
35
|
-
SET_MAPPING,
|
|
36
|
-
UNARY_MAPPING,
|
|
37
|
-
THEN_ELSE,
|
|
38
|
-
HR_UNARY_MAPPING,
|
|
39
|
-
HR_COMP_MAPPING,
|
|
40
|
-
HR_NUM_BINARY_MAPPING,
|
|
41
|
-
)
|
|
42
|
-
from vtlengine.files.output import save_datapoints
|
|
43
|
-
from vtlengine.files.output._time_period_representation import TimePeriodRepresentation
|
|
44
|
-
from vtlengine.files.parser import load_datapoints, _fill_dataset_empty_data
|
|
45
|
-
|
|
46
11
|
from vtlengine.AST.ASTTemplate import ASTTemplate
|
|
47
12
|
from vtlengine.AST.DAG import HRDAGAnalyzer
|
|
48
|
-
from vtlengine.AST.DAG._words import
|
|
13
|
+
from vtlengine.AST.DAG._words import DELETE, GLOBAL, INSERT
|
|
49
14
|
from vtlengine.AST.Grammar.tokens import (
|
|
50
15
|
AGGREGATE,
|
|
51
16
|
ALL,
|
|
52
17
|
APPLY,
|
|
53
18
|
AS,
|
|
54
19
|
BETWEEN,
|
|
20
|
+
CALC,
|
|
21
|
+
CAST,
|
|
55
22
|
CHECK_DATAPOINT,
|
|
23
|
+
CHECK_HIERARCHY,
|
|
24
|
+
COUNT,
|
|
25
|
+
CURRENT_DATE,
|
|
26
|
+
DATE_ADD,
|
|
56
27
|
DROP,
|
|
28
|
+
EQ,
|
|
57
29
|
EXISTS_IN,
|
|
58
30
|
EXTERNAL,
|
|
31
|
+
FILL_TIME_SERIES,
|
|
59
32
|
FILTER,
|
|
60
33
|
HAVING,
|
|
34
|
+
HIERARCHY,
|
|
61
35
|
INSTR,
|
|
62
36
|
KEEP,
|
|
63
37
|
MEMBERSHIP,
|
|
@@ -66,25 +40,53 @@ from vtlengine.AST.Grammar.tokens import (
|
|
|
66
40
|
SUBSTR,
|
|
67
41
|
TRUNC,
|
|
68
42
|
WHEN,
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
43
|
+
)
|
|
44
|
+
from vtlengine.DataTypes import (
|
|
45
|
+
BASIC_TYPES,
|
|
46
|
+
SCALAR_TYPES_CLASS_REVERSE,
|
|
47
|
+
Boolean,
|
|
48
|
+
ScalarType,
|
|
49
|
+
check_unary_implicit_promotion,
|
|
76
50
|
)
|
|
77
51
|
from vtlengine.Exceptions import SemanticError
|
|
52
|
+
from vtlengine.files.output import save_datapoints
|
|
53
|
+
from vtlengine.files.output._time_period_representation import TimePeriodRepresentation
|
|
54
|
+
from vtlengine.files.parser import _fill_dataset_empty_data, load_datapoints
|
|
78
55
|
from vtlengine.Model import (
|
|
56
|
+
Component,
|
|
79
57
|
DataComponent,
|
|
80
58
|
Dataset,
|
|
81
59
|
ExternalRoutine,
|
|
82
60
|
Role,
|
|
83
61
|
Scalar,
|
|
84
62
|
ScalarSet,
|
|
85
|
-
Component,
|
|
86
63
|
ValueDomain,
|
|
87
64
|
)
|
|
65
|
+
from vtlengine.Operators.Aggregation import extract_grouping_identifiers
|
|
66
|
+
from vtlengine.Operators.Assignment import Assignment
|
|
67
|
+
from vtlengine.Operators.CastOperator import Cast
|
|
68
|
+
from vtlengine.Operators.Comparison import Between, ExistIn
|
|
69
|
+
from vtlengine.Operators.Conditional import Case, If
|
|
70
|
+
from vtlengine.Operators.General import Eval
|
|
71
|
+
from vtlengine.Operators.HROperators import HAAssignment, Hierarchy, get_measure_from_dataset
|
|
72
|
+
from vtlengine.Operators.Numeric import Round, Trunc
|
|
73
|
+
from vtlengine.Operators.String import Instr, Replace, Substr
|
|
74
|
+
from vtlengine.Operators.Time import Current_Date, Date_Add, Fill_time_series, Time_Aggregation
|
|
75
|
+
from vtlengine.Operators.Validation import Check, Check_Datapoint, Check_Hierarchy
|
|
76
|
+
from vtlengine.Utils import (
|
|
77
|
+
AGGREGATION_MAPPING,
|
|
78
|
+
ANALYTIC_MAPPING,
|
|
79
|
+
BINARY_MAPPING,
|
|
80
|
+
HR_COMP_MAPPING,
|
|
81
|
+
HR_NUM_BINARY_MAPPING,
|
|
82
|
+
HR_UNARY_MAPPING,
|
|
83
|
+
JOIN_MAPPING,
|
|
84
|
+
REGULAR_AGGREGATION_MAPPING,
|
|
85
|
+
ROLE_SETTER_MAPPING,
|
|
86
|
+
SET_MAPPING,
|
|
87
|
+
THEN_ELSE,
|
|
88
|
+
UNARY_MAPPING,
|
|
89
|
+
)
|
|
88
90
|
|
|
89
91
|
|
|
90
92
|
# noinspection PyTypeChecker
|
|
@@ -103,7 +105,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
103
105
|
# Time Period Representation
|
|
104
106
|
time_period_representation: Optional[TimePeriodRepresentation] = None
|
|
105
107
|
# Flags to change behavior
|
|
106
|
-
|
|
108
|
+
nested_condition: Union[str, bool] = False
|
|
107
109
|
is_from_assignment: bool = False
|
|
108
110
|
is_from_component_assignment: bool = False
|
|
109
111
|
is_from_regular_aggregation: bool = False
|
|
@@ -115,7 +117,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
115
117
|
is_from_condition: bool = False
|
|
116
118
|
is_from_hr_val: bool = False
|
|
117
119
|
is_from_hr_agg: bool = False
|
|
118
|
-
|
|
120
|
+
condition_stack: Optional[List[str]] = None
|
|
119
121
|
# Handlers for simplicity
|
|
120
122
|
regular_aggregation_dataset: Optional[Dataset] = None
|
|
121
123
|
aggregation_grouping: Optional[List[str]] = None
|
|
@@ -199,17 +201,17 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
199
201
|
if isinstance(child, (AST.Assignment, AST.PersistentAssignment)):
|
|
200
202
|
vtlengine.Exceptions.dataset_output = child.left.value # type: ignore[attr-defined]
|
|
201
203
|
self._load_datapoints_efficient(statement_num)
|
|
202
|
-
if not isinstance(child, (AST.HRuleset, AST.DPRuleset, AST.Operator))
|
|
203
|
-
|
|
204
|
-
|
|
204
|
+
if (not isinstance(child, (AST.HRuleset, AST.DPRuleset, AST.Operator)) and
|
|
205
|
+
not isinstance(child, (AST.Assignment, AST.PersistentAssignment))):
|
|
206
|
+
raise SemanticError("1-3-17")
|
|
205
207
|
result = self.visit(child)
|
|
206
208
|
|
|
207
209
|
# Reset some handlers (joins and if)
|
|
208
210
|
self.is_from_join = False
|
|
209
|
-
self.
|
|
211
|
+
self.condition_stack = None
|
|
210
212
|
self.then_condition_dataset = None
|
|
211
213
|
self.else_condition_dataset = None
|
|
212
|
-
self.
|
|
214
|
+
self.nested_condition = False
|
|
213
215
|
|
|
214
216
|
if result is None:
|
|
215
217
|
continue
|
|
@@ -361,20 +363,20 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
361
363
|
if (
|
|
362
364
|
not self.is_from_condition
|
|
363
365
|
and node.op != MEMBERSHIP
|
|
364
|
-
and self.
|
|
365
|
-
and len(self.
|
|
366
|
+
and self.condition_stack is not None
|
|
367
|
+
and len(self.condition_stack) > 0
|
|
366
368
|
):
|
|
367
369
|
is_from_if = self.is_from_if
|
|
368
370
|
self.is_from_if = False
|
|
369
371
|
|
|
370
|
-
if self.is_from_join and node.op in [MEMBERSHIP, AGGREGATE]
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
372
|
+
if (self.is_from_join and node.op in [MEMBERSHIP, AGGREGATE] and
|
|
373
|
+
hasattr(node.left, "value") and hasattr(node.right, "value")):
|
|
374
|
+
if self.udo_params is not None and node.right.value in self.udo_params[-1]:
|
|
375
|
+
comp_name = f"{node.left.value}#{self.udo_params[-1][node.right.value]}"
|
|
376
|
+
else:
|
|
377
|
+
comp_name = f"{node.left.value}#{node.right.value}"
|
|
378
|
+
ast_var_id = AST.VarID(value=comp_name)
|
|
379
|
+
return self.visit(ast_var_id)
|
|
378
380
|
left_operand = self.visit(node.left)
|
|
379
381
|
right_operand = self.visit(node.right)
|
|
380
382
|
if is_from_if:
|
|
@@ -452,10 +454,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
452
454
|
grouping_op = node.grouping_op
|
|
453
455
|
if node.grouping is not None:
|
|
454
456
|
if grouping_op == "group all":
|
|
455
|
-
if self.only_semantic
|
|
456
|
-
data = None
|
|
457
|
-
else:
|
|
458
|
-
data = copy(operand.data)
|
|
457
|
+
data = None if self.only_semantic else copy(operand.data)
|
|
459
458
|
self.aggregation_dataset = Dataset(
|
|
460
459
|
name=operand.name, components=operand.components, data=data
|
|
461
460
|
)
|
|
@@ -520,6 +519,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
520
519
|
return having
|
|
521
520
|
|
|
522
521
|
def visit_Analytic(self, node: AST.Analytic) -> Any: # noqa: C901
|
|
522
|
+
component_name = None
|
|
523
523
|
if self.is_from_regular_aggregation:
|
|
524
524
|
if self.regular_aggregation_dataset is None:
|
|
525
525
|
raise SemanticError("1-1-6-10")
|
|
@@ -527,6 +527,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
527
527
|
operand = self.regular_aggregation_dataset
|
|
528
528
|
else:
|
|
529
529
|
operand_comp = self.visit(node.operand)
|
|
530
|
+
component_name = operand_comp.name
|
|
530
531
|
measure_names = self.regular_aggregation_dataset.get_measures_names()
|
|
531
532
|
dataset_components = self.regular_aggregation_dataset.components.copy()
|
|
532
533
|
for name in measure_names:
|
|
@@ -598,6 +599,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
598
599
|
ordering=ordering,
|
|
599
600
|
window=node.window,
|
|
600
601
|
params=params,
|
|
602
|
+
component_name=component_name,
|
|
601
603
|
)
|
|
602
604
|
if not self.is_from_regular_aggregation:
|
|
603
605
|
return result
|
|
@@ -610,7 +612,10 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
610
612
|
)
|
|
611
613
|
|
|
612
614
|
# # Extracting the component we need (only measure)
|
|
613
|
-
|
|
615
|
+
if component_name is None or node.op == COUNT:
|
|
616
|
+
measure_name = result.get_measures_names()[0]
|
|
617
|
+
else:
|
|
618
|
+
measure_name = component_name
|
|
614
619
|
# Joining the result with the original dataset
|
|
615
620
|
if self.only_semantic:
|
|
616
621
|
data = None
|
|
@@ -723,7 +728,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
723
728
|
nullable=self.aggregation_dataset.components[node.value].nullable,
|
|
724
729
|
)
|
|
725
730
|
if self.is_from_regular_aggregation:
|
|
726
|
-
if self.is_from_join and node.value in self.datasets
|
|
731
|
+
if self.is_from_join and node.value in self.datasets:
|
|
727
732
|
return self.datasets[node.value]
|
|
728
733
|
if self.regular_aggregation_dataset is not None:
|
|
729
734
|
if node.value in self.datasets and isinstance(self.datasets[node.value], Scalar):
|
|
@@ -739,10 +744,8 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
739
744
|
is_partial_present = 0
|
|
740
745
|
found_comp = None
|
|
741
746
|
for comp_name in self.regular_aggregation_dataset.get_components_names():
|
|
742
|
-
if "#" in comp_name and comp_name.split("#")[1] == node.value
|
|
743
|
-
|
|
744
|
-
found_comp = comp_name
|
|
745
|
-
elif "#" in node.value and node.value.split("#")[1] == comp_name:
|
|
747
|
+
if ("#" in comp_name and comp_name.split("#")[1] == node.value or "#"
|
|
748
|
+
in node.value and node.value.split("#")[1] == comp_name):
|
|
746
749
|
is_partial_present += 1
|
|
747
750
|
found_comp = comp_name
|
|
748
751
|
if is_partial_present == 0:
|
|
@@ -782,10 +785,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
782
785
|
raise SemanticError(
|
|
783
786
|
"1-1-1-10", comp_name=node.value, dataset_name=self.ruleset_dataset.name
|
|
784
787
|
)
|
|
785
|
-
if self.rule_data is None
|
|
786
|
-
data = None
|
|
787
|
-
else:
|
|
788
|
-
data = self.rule_data[comp_name]
|
|
788
|
+
data = None if self.rule_data is None else self.rule_data[comp_name]
|
|
789
789
|
return DataComponent(
|
|
790
790
|
name=comp_name,
|
|
791
791
|
data=data,
|
|
@@ -802,10 +802,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
802
802
|
elements = []
|
|
803
803
|
duplicates = []
|
|
804
804
|
for child in node.children:
|
|
805
|
-
if isinstance(child, AST.ParamOp)
|
|
806
|
-
ref_element = child.children[1]
|
|
807
|
-
else:
|
|
808
|
-
ref_element = child
|
|
805
|
+
ref_element = child.children[1] if isinstance(child, AST.ParamOp) else child
|
|
809
806
|
if ref_element in elements:
|
|
810
807
|
duplicates.append(ref_element)
|
|
811
808
|
elements.append(self.visit(child).value)
|
|
@@ -842,9 +839,8 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
842
839
|
self.is_from_regular_aggregation = True
|
|
843
840
|
operands.append(self.visit(child))
|
|
844
841
|
self.is_from_regular_aggregation = False
|
|
845
|
-
if node.op == CALC:
|
|
846
|
-
|
|
847
|
-
raise SemanticError("1-3-35", op=node.op)
|
|
842
|
+
if node.op == CALC and any(isinstance(operand, Dataset) for operand in operands):
|
|
843
|
+
raise SemanticError("1-3-35", op=node.op)
|
|
848
844
|
if node.op == AGGREGATE:
|
|
849
845
|
# Extracting the role encoded inside the children assignments
|
|
850
846
|
role_info = {
|
|
@@ -961,36 +957,52 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
961
957
|
|
|
962
958
|
# Analysis for data component and dataset
|
|
963
959
|
else:
|
|
964
|
-
if self.
|
|
965
|
-
self.
|
|
960
|
+
if self.condition_stack is None:
|
|
961
|
+
self.condition_stack = []
|
|
966
962
|
if self.then_condition_dataset is None:
|
|
967
963
|
self.then_condition_dataset = []
|
|
968
964
|
if self.else_condition_dataset is None:
|
|
969
965
|
self.else_condition_dataset = []
|
|
970
966
|
self.generate_then_else_datasets(copy(condition))
|
|
971
967
|
|
|
972
|
-
self.
|
|
968
|
+
self.condition_stack.append(THEN_ELSE["then"])
|
|
973
969
|
self.is_from_if = True
|
|
974
|
-
self.
|
|
970
|
+
self.nested_condition = "T" if isinstance(node.thenOp, AST.If) else False
|
|
975
971
|
thenOp = self.visit(node.thenOp)
|
|
976
972
|
if isinstance(thenOp, Scalar) or not isinstance(node.thenOp, AST.BinOp):
|
|
977
973
|
self.then_condition_dataset.pop()
|
|
978
|
-
self.
|
|
974
|
+
self.condition_stack.pop()
|
|
979
975
|
|
|
980
|
-
self.
|
|
976
|
+
self.condition_stack.append(THEN_ELSE["else"])
|
|
981
977
|
self.is_from_if = True
|
|
982
|
-
self.
|
|
978
|
+
self.nested_condition = "E" if isinstance(node.elseOp, AST.If) else False
|
|
983
979
|
elseOp = self.visit(node.elseOp)
|
|
984
980
|
if isinstance(elseOp, Scalar) or (
|
|
985
981
|
not isinstance(node.elseOp, AST.BinOp) and not isinstance(node.elseOp, AST.If)
|
|
986
982
|
):
|
|
987
983
|
if len(self.else_condition_dataset) > 0:
|
|
988
984
|
self.else_condition_dataset.pop()
|
|
989
|
-
if len(self.
|
|
990
|
-
self.
|
|
985
|
+
if len(self.condition_stack) > 0:
|
|
986
|
+
self.condition_stack.pop()
|
|
991
987
|
|
|
992
988
|
return If.analyze(condition, thenOp, elseOp)
|
|
993
989
|
|
|
990
|
+
def visit_Case(self, node: AST.Case) -> Any:
|
|
991
|
+
conditions: List[Any] = []
|
|
992
|
+
thenOps: List[Any] = []
|
|
993
|
+
|
|
994
|
+
if self.condition_stack is None:
|
|
995
|
+
self.condition_stack = []
|
|
996
|
+
|
|
997
|
+
while node.cases:
|
|
998
|
+
case = node.cases.pop(0)
|
|
999
|
+
self.is_from_condition = True
|
|
1000
|
+
conditions.append(self.visit(case.condition))
|
|
1001
|
+
self.is_from_condition = False
|
|
1002
|
+
thenOps.append(self.visit(case.thenOp))
|
|
1003
|
+
|
|
1004
|
+
return Case.analyze(conditions, thenOps, self.visit(node.elseOp))
|
|
1005
|
+
|
|
994
1006
|
def visit_RenameNode(self, node: AST.RenameNode) -> Any:
|
|
995
1007
|
if self.udo_params is not None:
|
|
996
1008
|
if "#" in node.old_name:
|
|
@@ -1033,11 +1045,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1033
1045
|
def visit_ParamOp(self, node: AST.ParamOp) -> None: # noqa: C901
|
|
1034
1046
|
if node.op == ROUND:
|
|
1035
1047
|
op_element = self.visit(node.children[0])
|
|
1036
|
-
if len(node.params) != 0
|
|
1037
|
-
param_element = self.visit(node.params[0])
|
|
1038
|
-
else:
|
|
1039
|
-
param_element = None
|
|
1040
|
-
|
|
1048
|
+
param_element = self.visit(node.params[0]) if len(node.params) != 0 else None
|
|
1041
1049
|
return Round.analyze(op_element, param_element)
|
|
1042
1050
|
|
|
1043
1051
|
# Numeric Operator
|
|
@@ -1093,6 +1101,9 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1093
1101
|
elif node.op == FILL_TIME_SERIES:
|
|
1094
1102
|
mode = self.visit(node.params[0]) if len(node.params) == 1 else "all"
|
|
1095
1103
|
return Fill_time_series.analyze(self.visit(node.children[0]), mode)
|
|
1104
|
+
elif node.op == DATE_ADD:
|
|
1105
|
+
params = [self.visit(node.params[0]), self.visit(node.params[1])]
|
|
1106
|
+
return Date_Add.analyze(self.visit(node.children[0]), params)
|
|
1096
1107
|
elif node.op == CAST:
|
|
1097
1108
|
operand = self.visit(node.children[0])
|
|
1098
1109
|
scalar_type = node.children[1]
|
|
@@ -1211,11 +1222,8 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1211
1222
|
if node.op == HIERARCHY:
|
|
1212
1223
|
aux = []
|
|
1213
1224
|
for rule in hr_info["rules"]:
|
|
1214
|
-
if rule.rule.op == EQ:
|
|
1225
|
+
if rule.rule.op == EQ or rule.rule.op == WHEN and rule.rule.right.op == EQ:
|
|
1215
1226
|
aux.append(rule)
|
|
1216
|
-
elif rule.rule.op == WHEN:
|
|
1217
|
-
if rule.rule.right.op == EQ:
|
|
1218
|
-
aux.append(rule)
|
|
1219
1227
|
# Filter only the rules with HRBinOP as =,
|
|
1220
1228
|
# as they are the ones that will be computed
|
|
1221
1229
|
if len(aux) == 0:
|
|
@@ -1323,9 +1331,11 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1323
1331
|
if self.rule_data is None:
|
|
1324
1332
|
return None
|
|
1325
1333
|
filtering_indexes = list(filter_comp.data[filter_comp.data == True].index)
|
|
1334
|
+
nan_indexes = list(filter_comp.data[filter_comp.data.isnull()].index)
|
|
1326
1335
|
# If no filtering indexes, then all datapoints are valid on DPR and HR
|
|
1327
1336
|
if len(filtering_indexes) == 0 and not (self.is_from_hr_agg or self.is_from_hr_val):
|
|
1328
1337
|
self.rule_data["bool_var"] = True
|
|
1338
|
+
self.rule_data.loc[nan_indexes, "bool_var"] = None
|
|
1329
1339
|
return self.rule_data
|
|
1330
1340
|
non_filtering_indexes = list(set(filter_comp.data.index) - set(filtering_indexes))
|
|
1331
1341
|
|
|
@@ -1340,6 +1350,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1340
1350
|
self.rule_data, how="left", on=original_data.columns.tolist()
|
|
1341
1351
|
)
|
|
1342
1352
|
original_data.loc[non_filtering_indexes, "bool_var"] = True
|
|
1353
|
+
original_data.loc[nan_indexes, "bool_var"] = None
|
|
1343
1354
|
return original_data
|
|
1344
1355
|
elif node.op in HR_COMP_MAPPING:
|
|
1345
1356
|
self.is_from_assignment = True
|
|
@@ -1385,12 +1396,10 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1385
1396
|
left_operand.data = pd.DataFrame({measure_name: []})
|
|
1386
1397
|
if right_operand.data is None:
|
|
1387
1398
|
right_operand.data = pd.DataFrame({measure_name: []})
|
|
1388
|
-
left_null_indexes = set(
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
list(right_operand.data[right_operand.data[measure_name].isnull()].index)
|
|
1393
|
-
)
|
|
1399
|
+
left_null_indexes = set(left_operand.data[left_operand.data[
|
|
1400
|
+
measure_name].isnull()].index)
|
|
1401
|
+
right_null_indexes = set(right_operand.data[right_operand.data[
|
|
1402
|
+
measure_name].isnull()].index)
|
|
1394
1403
|
# If no indexes are in common, then one datapoint is not null
|
|
1395
1404
|
invalid_indexes = list(left_null_indexes.intersection(right_null_indexes))
|
|
1396
1405
|
if len(invalid_indexes) > 0:
|
|
@@ -1478,22 +1487,18 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1478
1487
|
if condition.data_type != BASIC_TYPES[bool]:
|
|
1479
1488
|
raise ValueError("Only boolean scalars are allowed on data component condition")
|
|
1480
1489
|
name = condition.name
|
|
1481
|
-
if condition.data is None
|
|
1482
|
-
data = None
|
|
1483
|
-
else:
|
|
1484
|
-
data = condition.data
|
|
1490
|
+
data = None if condition.data is None else condition.data
|
|
1485
1491
|
|
|
1486
1492
|
if data is not None:
|
|
1487
|
-
if self.
|
|
1493
|
+
if self.nested_condition and self.condition_stack is not None:
|
|
1488
1494
|
merge_df = (
|
|
1489
1495
|
self.then_condition_dataset[-1]
|
|
1490
|
-
if self.
|
|
1496
|
+
if self.condition_stack[-1] == THEN_ELSE["then"]
|
|
1491
1497
|
else self.else_condition_dataset[-1]
|
|
1492
1498
|
)
|
|
1493
1499
|
indexes = merge_df.data[merge_df.data.columns[-1]]
|
|
1494
1500
|
else:
|
|
1495
|
-
indexes = data.index
|
|
1496
|
-
data = data.fillna(False)
|
|
1501
|
+
indexes = data[data.notnull()].index
|
|
1497
1502
|
|
|
1498
1503
|
if isinstance(condition, Dataset):
|
|
1499
1504
|
filtered_data = data.iloc[indexes]
|
|
@@ -1540,12 +1545,12 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1540
1545
|
if (
|
|
1541
1546
|
self.then_condition_dataset is None
|
|
1542
1547
|
or self.else_condition_dataset is None
|
|
1543
|
-
or self.
|
|
1548
|
+
or self.condition_stack is None
|
|
1544
1549
|
):
|
|
1545
1550
|
return left_operand, right_operand
|
|
1546
1551
|
merge_dataset = (
|
|
1547
1552
|
self.then_condition_dataset.pop()
|
|
1548
|
-
if self.
|
|
1553
|
+
if self.condition_stack.pop() == THEN_ELSE["then"]
|
|
1549
1554
|
else (self.else_condition_dataset.pop())
|
|
1550
1555
|
)
|
|
1551
1556
|
merge_index = merge_dataset.data[merge_dataset.get_measures_names()[0]].to_list()
|
|
@@ -1617,8 +1622,8 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1617
1622
|
|
|
1618
1623
|
# Getting Dataset elements
|
|
1619
1624
|
result_components = {
|
|
1620
|
-
|
|
1621
|
-
for
|
|
1625
|
+
comp_name: copy(comp)
|
|
1626
|
+
for comp_name, comp in self.ruleset_dataset.components.items() # type: ignore[union-attr]
|
|
1622
1627
|
}
|
|
1623
1628
|
if self.ruleset_signature is not None:
|
|
1624
1629
|
hr_component = self.ruleset_signature["RULE_COMPONENT"]
|
vtlengine/Model/__init__.py
CHANGED
|
@@ -2,17 +2,18 @@ import json
|
|
|
2
2
|
from collections import Counter
|
|
3
3
|
from dataclasses import dataclass
|
|
4
4
|
from enum import Enum
|
|
5
|
-
from typing import Dict, List, Optional,
|
|
5
|
+
from typing import Any, Dict, List, Optional, Type, Union
|
|
6
6
|
|
|
7
|
-
import vtlengine.DataTypes as DataTypes
|
|
8
7
|
import pandas as pd
|
|
9
8
|
import sqlglot
|
|
10
9
|
import sqlglot.expressions as exp
|
|
11
|
-
from vtlengine.DataTypes import SCALAR_TYPES, ScalarType
|
|
12
|
-
from vtlengine.DataTypes.TimeHandling import TimePeriodHandler
|
|
13
10
|
from pandas import DataFrame as PandasDataFrame
|
|
14
11
|
from pandas._testing import assert_frame_equal
|
|
15
12
|
|
|
13
|
+
import vtlengine.DataTypes as DataTypes
|
|
14
|
+
from vtlengine.DataTypes import SCALAR_TYPES, ScalarType
|
|
15
|
+
from vtlengine.DataTypes.TimeHandling import TimePeriodHandler
|
|
16
|
+
from vtlengine.Exceptions import SemanticError
|
|
16
17
|
|
|
17
18
|
# from pyspark.pandas import DataFrame as SparkDataFrame, Series as SparkSeries
|
|
18
19
|
|
|
@@ -41,6 +42,13 @@ class Scalar:
|
|
|
41
42
|
return same_name and same_type and same_value
|
|
42
43
|
|
|
43
44
|
|
|
45
|
+
Role_keys = [
|
|
46
|
+
"Identifier",
|
|
47
|
+
"Attribute",
|
|
48
|
+
"Measure",
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
|
|
44
52
|
class Role(Enum):
|
|
45
53
|
"""
|
|
46
54
|
Enum class for the role of a component (Identifier, Attribute, Measure)
|
|
@@ -152,7 +160,7 @@ class Dataset:
|
|
|
152
160
|
raise ValueError(
|
|
153
161
|
"The number of components must match the number of columns in the data"
|
|
154
162
|
)
|
|
155
|
-
for name,
|
|
163
|
+
for name, _ in self.components.items():
|
|
156
164
|
if name not in self.data.columns:
|
|
157
165
|
raise ValueError(f"Component {name} not found in the data")
|
|
158
166
|
|
|
@@ -202,8 +210,8 @@ class Dataset:
|
|
|
202
210
|
return True
|
|
203
211
|
elif self.data is None or other.data is None:
|
|
204
212
|
return False
|
|
205
|
-
if len(self.data) == len(other.data) == 0:
|
|
206
|
-
|
|
213
|
+
if len(self.data) == len(other.data) == 0 and self.data.shape != other.data.shape:
|
|
214
|
+
raise SemanticError("0-1-1-14", dataset1=self.name, dataset2=other.name)
|
|
207
215
|
|
|
208
216
|
self.data.fillna("", inplace=True)
|
|
209
217
|
other.data.fillna("", inplace=True)
|
|
@@ -227,11 +235,8 @@ class Dataset:
|
|
|
227
235
|
lambda x: str(TimePeriodHandler(x)) if x != "" else "", na_action="ignore"
|
|
228
236
|
)
|
|
229
237
|
elif type_name in ["Integer", "Number"]:
|
|
230
|
-
if type_name == "Integer"
|
|
231
|
-
|
|
232
|
-
else:
|
|
233
|
-
type_ = "float32"
|
|
234
|
-
# We use here a number to avoid errors on equality on empty strings
|
|
238
|
+
type_ = "int64" if type_name == "Integer" else "float32"
|
|
239
|
+
# We use here a number to avoid errors on equality on empty strings
|
|
235
240
|
self.data[comp.name] = (
|
|
236
241
|
self.data[comp.name]
|
|
237
242
|
.replace("", -1234997)
|
|
@@ -1,19 +1,8 @@
|
|
|
1
1
|
from copy import copy
|
|
2
|
-
from typing import List, Optional
|
|
2
|
+
from typing import Any, List, Optional
|
|
3
3
|
|
|
4
4
|
import duckdb
|
|
5
5
|
import pandas as pd
|
|
6
|
-
from vtlengine.DataTypes import (
|
|
7
|
-
Integer,
|
|
8
|
-
Number,
|
|
9
|
-
unary_implicit_promotion,
|
|
10
|
-
Boolean,
|
|
11
|
-
String,
|
|
12
|
-
Duration,
|
|
13
|
-
TimeInterval,
|
|
14
|
-
TimePeriod,
|
|
15
|
-
Date,
|
|
16
|
-
)
|
|
17
6
|
|
|
18
7
|
import vtlengine.Operators as Operator
|
|
19
8
|
from vtlengine.AST.Grammar.tokens import (
|
|
@@ -28,11 +17,22 @@ from vtlengine.AST.Grammar.tokens import (
|
|
|
28
17
|
VAR_POP,
|
|
29
18
|
VAR_SAMP,
|
|
30
19
|
)
|
|
20
|
+
from vtlengine.DataTypes import (
|
|
21
|
+
Boolean,
|
|
22
|
+
Date,
|
|
23
|
+
Duration,
|
|
24
|
+
Integer,
|
|
25
|
+
Number,
|
|
26
|
+
String,
|
|
27
|
+
TimeInterval,
|
|
28
|
+
TimePeriod,
|
|
29
|
+
unary_implicit_promotion,
|
|
30
|
+
)
|
|
31
31
|
from vtlengine.DataTypes.TimeHandling import (
|
|
32
32
|
DURATION_MAPPING,
|
|
33
33
|
DURATION_MAPPING_REVERSED,
|
|
34
|
-
TimePeriodHandler,
|
|
35
34
|
TimeIntervalHandler,
|
|
35
|
+
TimePeriodHandler,
|
|
36
36
|
)
|
|
37
37
|
from vtlengine.Exceptions import SemanticError
|
|
38
38
|
from vtlengine.Model import Component, Dataset, Role
|
|
@@ -153,7 +153,7 @@ class Aggregation(Operator.Unary):
|
|
|
153
153
|
if comp.role == Role.ATTRIBUTE:
|
|
154
154
|
del result_components[comp_name]
|
|
155
155
|
# Change Measure data type
|
|
156
|
-
for
|
|
156
|
+
for _, comp in result_components.items():
|
|
157
157
|
if comp.role == Role.MEASURE:
|
|
158
158
|
unary_implicit_promotion(comp.data_type, cls.type_to_check)
|
|
159
159
|
if cls.return_type is not None:
|