vtlengine 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vtlengine might be problematic. Click here for more details.

Files changed (50) hide show
  1. vtlengine/API/_InternalApi.py +19 -8
  2. vtlengine/API/__init__.py +9 -9
  3. vtlengine/AST/ASTConstructor.py +23 -43
  4. vtlengine/AST/ASTConstructorModules/Expr.py +147 -71
  5. vtlengine/AST/ASTConstructorModules/ExprComponents.py +104 -40
  6. vtlengine/AST/ASTConstructorModules/Terminals.py +28 -39
  7. vtlengine/AST/ASTTemplate.py +16 -1
  8. vtlengine/AST/DAG/__init__.py +12 -15
  9. vtlengine/AST/Grammar/Vtl.g4 +49 -20
  10. vtlengine/AST/Grammar/VtlTokens.g4 +13 -1
  11. vtlengine/AST/Grammar/lexer.py +1293 -1183
  12. vtlengine/AST/Grammar/parser.py +5758 -3939
  13. vtlengine/AST/Grammar/tokens.py +12 -0
  14. vtlengine/AST/VtlVisitor.py +9 -2
  15. vtlengine/AST/__init__.py +21 -3
  16. vtlengine/DataTypes/TimeHandling.py +12 -7
  17. vtlengine/DataTypes/__init__.py +17 -24
  18. vtlengine/Exceptions/__init__.py +43 -1
  19. vtlengine/Exceptions/messages.py +82 -62
  20. vtlengine/Interpreter/__init__.py +125 -120
  21. vtlengine/Model/__init__.py +17 -12
  22. vtlengine/Operators/Aggregation.py +14 -14
  23. vtlengine/Operators/Analytic.py +56 -31
  24. vtlengine/Operators/Assignment.py +2 -3
  25. vtlengine/Operators/Boolean.py +5 -7
  26. vtlengine/Operators/CastOperator.py +12 -13
  27. vtlengine/Operators/Clause.py +11 -13
  28. vtlengine/Operators/Comparison.py +31 -17
  29. vtlengine/Operators/Conditional.py +157 -17
  30. vtlengine/Operators/General.py +4 -4
  31. vtlengine/Operators/HROperators.py +41 -34
  32. vtlengine/Operators/Join.py +18 -22
  33. vtlengine/Operators/Numeric.py +76 -39
  34. vtlengine/Operators/RoleSetter.py +6 -8
  35. vtlengine/Operators/Set.py +7 -12
  36. vtlengine/Operators/String.py +19 -27
  37. vtlengine/Operators/Time.py +366 -43
  38. vtlengine/Operators/Validation.py +4 -7
  39. vtlengine/Operators/__init__.py +38 -41
  40. vtlengine/Utils/__init__.py +149 -94
  41. vtlengine/__init__.py +1 -1
  42. vtlengine/files/output/__init__.py +2 -2
  43. vtlengine/files/output/_time_period_representation.py +0 -1
  44. vtlengine/files/parser/__init__.py +18 -18
  45. vtlengine/files/parser/_time_checking.py +3 -2
  46. {vtlengine-1.0.1.dist-info → vtlengine-1.0.3.dist-info}/METADATA +17 -5
  47. vtlengine-1.0.3.dist-info/RECORD +58 -0
  48. vtlengine-1.0.1.dist-info/RECORD +0 -58
  49. {vtlengine-1.0.1.dist-info → vtlengine-1.0.3.dist-info}/LICENSE.md +0 -0
  50. {vtlengine-1.0.1.dist-info → vtlengine-1.0.3.dist-info}/WHEEL +0 -0
@@ -3,61 +3,35 @@ from dataclasses import dataclass
3
3
  from pathlib import Path
4
4
  from typing import Any, Dict, List, Optional, Union
5
5
 
6
+ import pandas as pd
7
+
6
8
  import vtlengine.AST as AST
7
9
  import vtlengine.Exceptions
8
10
  import vtlengine.Operators as Operators
9
- import pandas as pd
10
- from vtlengine.DataTypes import (
11
- BASIC_TYPES,
12
- check_unary_implicit_promotion,
13
- ScalarType,
14
- Boolean,
15
- SCALAR_TYPES_CLASS_REVERSE,
16
- )
17
- from vtlengine.Operators.Aggregation import extract_grouping_identifiers
18
- from vtlengine.Operators.Assignment import Assignment
19
- from vtlengine.Operators.CastOperator import Cast
20
- from vtlengine.Operators.Comparison import Between, ExistIn
21
- from vtlengine.Operators.Conditional import If
22
- from vtlengine.Operators.General import Eval
23
- from vtlengine.Operators.HROperators import get_measure_from_dataset, HAAssignment, Hierarchy
24
- from vtlengine.Operators.Numeric import Round, Trunc
25
- from vtlengine.Operators.String import Instr, Replace, Substr
26
- from vtlengine.Operators.Time import Fill_time_series, Time_Aggregation, Current_Date
27
- from vtlengine.Operators.Validation import Check, Check_Datapoint, Check_Hierarchy
28
- from vtlengine.Utils import (
29
- AGGREGATION_MAPPING,
30
- ANALYTIC_MAPPING,
31
- BINARY_MAPPING,
32
- JOIN_MAPPING,
33
- REGULAR_AGGREGATION_MAPPING,
34
- ROLE_SETTER_MAPPING,
35
- SET_MAPPING,
36
- UNARY_MAPPING,
37
- THEN_ELSE,
38
- HR_UNARY_MAPPING,
39
- HR_COMP_MAPPING,
40
- HR_NUM_BINARY_MAPPING,
41
- )
42
- from vtlengine.files.output import save_datapoints
43
- from vtlengine.files.output._time_period_representation import TimePeriodRepresentation
44
- from vtlengine.files.parser import load_datapoints, _fill_dataset_empty_data
45
-
46
11
  from vtlengine.AST.ASTTemplate import ASTTemplate
47
12
  from vtlengine.AST.DAG import HRDAGAnalyzer
48
- from vtlengine.AST.DAG._words import GLOBAL, DELETE, INSERT
13
+ from vtlengine.AST.DAG._words import DELETE, GLOBAL, INSERT
49
14
  from vtlengine.AST.Grammar.tokens import (
50
15
  AGGREGATE,
51
16
  ALL,
52
17
  APPLY,
53
18
  AS,
54
19
  BETWEEN,
20
+ CALC,
21
+ CAST,
55
22
  CHECK_DATAPOINT,
23
+ CHECK_HIERARCHY,
24
+ COUNT,
25
+ CURRENT_DATE,
26
+ DATE_ADD,
56
27
  DROP,
28
+ EQ,
57
29
  EXISTS_IN,
58
30
  EXTERNAL,
31
+ FILL_TIME_SERIES,
59
32
  FILTER,
60
33
  HAVING,
34
+ HIERARCHY,
61
35
  INSTR,
62
36
  KEEP,
63
37
  MEMBERSHIP,
@@ -66,25 +40,53 @@ from vtlengine.AST.Grammar.tokens import (
66
40
  SUBSTR,
67
41
  TRUNC,
68
42
  WHEN,
69
- FILL_TIME_SERIES,
70
- CAST,
71
- CHECK_HIERARCHY,
72
- HIERARCHY,
73
- EQ,
74
- CURRENT_DATE,
75
- CALC,
43
+ )
44
+ from vtlengine.DataTypes import (
45
+ BASIC_TYPES,
46
+ SCALAR_TYPES_CLASS_REVERSE,
47
+ Boolean,
48
+ ScalarType,
49
+ check_unary_implicit_promotion,
76
50
  )
77
51
  from vtlengine.Exceptions import SemanticError
52
+ from vtlengine.files.output import save_datapoints
53
+ from vtlengine.files.output._time_period_representation import TimePeriodRepresentation
54
+ from vtlengine.files.parser import _fill_dataset_empty_data, load_datapoints
78
55
  from vtlengine.Model import (
56
+ Component,
79
57
  DataComponent,
80
58
  Dataset,
81
59
  ExternalRoutine,
82
60
  Role,
83
61
  Scalar,
84
62
  ScalarSet,
85
- Component,
86
63
  ValueDomain,
87
64
  )
65
+ from vtlengine.Operators.Aggregation import extract_grouping_identifiers
66
+ from vtlengine.Operators.Assignment import Assignment
67
+ from vtlengine.Operators.CastOperator import Cast
68
+ from vtlengine.Operators.Comparison import Between, ExistIn
69
+ from vtlengine.Operators.Conditional import Case, If
70
+ from vtlengine.Operators.General import Eval
71
+ from vtlengine.Operators.HROperators import HAAssignment, Hierarchy, get_measure_from_dataset
72
+ from vtlengine.Operators.Numeric import Round, Trunc
73
+ from vtlengine.Operators.String import Instr, Replace, Substr
74
+ from vtlengine.Operators.Time import Current_Date, Date_Add, Fill_time_series, Time_Aggregation
75
+ from vtlengine.Operators.Validation import Check, Check_Datapoint, Check_Hierarchy
76
+ from vtlengine.Utils import (
77
+ AGGREGATION_MAPPING,
78
+ ANALYTIC_MAPPING,
79
+ BINARY_MAPPING,
80
+ HR_COMP_MAPPING,
81
+ HR_NUM_BINARY_MAPPING,
82
+ HR_UNARY_MAPPING,
83
+ JOIN_MAPPING,
84
+ REGULAR_AGGREGATION_MAPPING,
85
+ ROLE_SETTER_MAPPING,
86
+ SET_MAPPING,
87
+ THEN_ELSE,
88
+ UNARY_MAPPING,
89
+ )
88
90
 
89
91
 
90
92
  # noinspection PyTypeChecker
@@ -103,7 +105,7 @@ class InterpreterAnalyzer(ASTTemplate):
103
105
  # Time Period Representation
104
106
  time_period_representation: Optional[TimePeriodRepresentation] = None
105
107
  # Flags to change behavior
106
- nested_if: Union[str, bool] = False
108
+ nested_condition: Union[str, bool] = False
107
109
  is_from_assignment: bool = False
108
110
  is_from_component_assignment: bool = False
109
111
  is_from_regular_aggregation: bool = False
@@ -115,7 +117,7 @@ class InterpreterAnalyzer(ASTTemplate):
115
117
  is_from_condition: bool = False
116
118
  is_from_hr_val: bool = False
117
119
  is_from_hr_agg: bool = False
118
- if_stack: Optional[List[str]] = None
120
+ condition_stack: Optional[List[str]] = None
119
121
  # Handlers for simplicity
120
122
  regular_aggregation_dataset: Optional[Dataset] = None
121
123
  aggregation_grouping: Optional[List[str]] = None
@@ -199,17 +201,17 @@ class InterpreterAnalyzer(ASTTemplate):
199
201
  if isinstance(child, (AST.Assignment, AST.PersistentAssignment)):
200
202
  vtlengine.Exceptions.dataset_output = child.left.value # type: ignore[attr-defined]
201
203
  self._load_datapoints_efficient(statement_num)
202
- if not isinstance(child, (AST.HRuleset, AST.DPRuleset, AST.Operator)):
203
- if not isinstance(child, (AST.Assignment, AST.PersistentAssignment)):
204
- raise SemanticError("1-3-17")
204
+ if (not isinstance(child, (AST.HRuleset, AST.DPRuleset, AST.Operator)) and
205
+ not isinstance(child, (AST.Assignment, AST.PersistentAssignment))):
206
+ raise SemanticError("1-3-17")
205
207
  result = self.visit(child)
206
208
 
207
209
  # Reset some handlers (joins and if)
208
210
  self.is_from_join = False
209
- self.if_stack = None
211
+ self.condition_stack = None
210
212
  self.then_condition_dataset = None
211
213
  self.else_condition_dataset = None
212
- self.nested_if = False
214
+ self.nested_condition = False
213
215
 
214
216
  if result is None:
215
217
  continue
@@ -361,20 +363,20 @@ class InterpreterAnalyzer(ASTTemplate):
361
363
  if (
362
364
  not self.is_from_condition
363
365
  and node.op != MEMBERSHIP
364
- and self.if_stack is not None
365
- and len(self.if_stack) > 0
366
+ and self.condition_stack is not None
367
+ and len(self.condition_stack) > 0
366
368
  ):
367
369
  is_from_if = self.is_from_if
368
370
  self.is_from_if = False
369
371
 
370
- if self.is_from_join and node.op in [MEMBERSHIP, AGGREGATE]:
371
- if hasattr(node.left, "value") and hasattr(node.right, "value"):
372
- if self.udo_params is not None and node.right.value in self.udo_params[-1]:
373
- comp_name = f"{node.left.value}#{self.udo_params[-1][node.right.value]}"
374
- else:
375
- comp_name = f"{node.left.value}#{node.right.value}"
376
- ast_var_id = AST.VarID(value=comp_name)
377
- return self.visit(ast_var_id)
372
+ if (self.is_from_join and node.op in [MEMBERSHIP, AGGREGATE] and
373
+ hasattr(node.left, "value") and hasattr(node.right, "value")):
374
+ if self.udo_params is not None and node.right.value in self.udo_params[-1]:
375
+ comp_name = f"{node.left.value}#{self.udo_params[-1][node.right.value]}"
376
+ else:
377
+ comp_name = f"{node.left.value}#{node.right.value}"
378
+ ast_var_id = AST.VarID(value=comp_name)
379
+ return self.visit(ast_var_id)
378
380
  left_operand = self.visit(node.left)
379
381
  right_operand = self.visit(node.right)
380
382
  if is_from_if:
@@ -452,10 +454,7 @@ class InterpreterAnalyzer(ASTTemplate):
452
454
  grouping_op = node.grouping_op
453
455
  if node.grouping is not None:
454
456
  if grouping_op == "group all":
455
- if self.only_semantic:
456
- data = None
457
- else:
458
- data = copy(operand.data)
457
+ data = None if self.only_semantic else copy(operand.data)
459
458
  self.aggregation_dataset = Dataset(
460
459
  name=operand.name, components=operand.components, data=data
461
460
  )
@@ -520,6 +519,7 @@ class InterpreterAnalyzer(ASTTemplate):
520
519
  return having
521
520
 
522
521
  def visit_Analytic(self, node: AST.Analytic) -> Any: # noqa: C901
522
+ component_name = None
523
523
  if self.is_from_regular_aggregation:
524
524
  if self.regular_aggregation_dataset is None:
525
525
  raise SemanticError("1-1-6-10")
@@ -527,6 +527,7 @@ class InterpreterAnalyzer(ASTTemplate):
527
527
  operand = self.regular_aggregation_dataset
528
528
  else:
529
529
  operand_comp = self.visit(node.operand)
530
+ component_name = operand_comp.name
530
531
  measure_names = self.regular_aggregation_dataset.get_measures_names()
531
532
  dataset_components = self.regular_aggregation_dataset.components.copy()
532
533
  for name in measure_names:
@@ -598,6 +599,7 @@ class InterpreterAnalyzer(ASTTemplate):
598
599
  ordering=ordering,
599
600
  window=node.window,
600
601
  params=params,
602
+ component_name=component_name,
601
603
  )
602
604
  if not self.is_from_regular_aggregation:
603
605
  return result
@@ -610,7 +612,10 @@ class InterpreterAnalyzer(ASTTemplate):
610
612
  )
611
613
 
612
614
  # # Extracting the component we need (only measure)
613
- measure_name = result.get_measures_names()[0]
615
+ if component_name is None or node.op == COUNT:
616
+ measure_name = result.get_measures_names()[0]
617
+ else:
618
+ measure_name = component_name
614
619
  # Joining the result with the original dataset
615
620
  if self.only_semantic:
616
621
  data = None
@@ -723,7 +728,7 @@ class InterpreterAnalyzer(ASTTemplate):
723
728
  nullable=self.aggregation_dataset.components[node.value].nullable,
724
729
  )
725
730
  if self.is_from_regular_aggregation:
726
- if self.is_from_join and node.value in self.datasets.keys():
731
+ if self.is_from_join and node.value in self.datasets:
727
732
  return self.datasets[node.value]
728
733
  if self.regular_aggregation_dataset is not None:
729
734
  if node.value in self.datasets and isinstance(self.datasets[node.value], Scalar):
@@ -739,10 +744,8 @@ class InterpreterAnalyzer(ASTTemplate):
739
744
  is_partial_present = 0
740
745
  found_comp = None
741
746
  for comp_name in self.regular_aggregation_dataset.get_components_names():
742
- if "#" in comp_name and comp_name.split("#")[1] == node.value:
743
- is_partial_present += 1
744
- found_comp = comp_name
745
- elif "#" in node.value and node.value.split("#")[1] == comp_name:
747
+ if ("#" in comp_name and comp_name.split("#")[1] == node.value or "#"
748
+ in node.value and node.value.split("#")[1] == comp_name):
746
749
  is_partial_present += 1
747
750
  found_comp = comp_name
748
751
  if is_partial_present == 0:
@@ -782,10 +785,7 @@ class InterpreterAnalyzer(ASTTemplate):
782
785
  raise SemanticError(
783
786
  "1-1-1-10", comp_name=node.value, dataset_name=self.ruleset_dataset.name
784
787
  )
785
- if self.rule_data is None:
786
- data = None
787
- else:
788
- data = self.rule_data[comp_name]
788
+ data = None if self.rule_data is None else self.rule_data[comp_name]
789
789
  return DataComponent(
790
790
  name=comp_name,
791
791
  data=data,
@@ -802,10 +802,7 @@ class InterpreterAnalyzer(ASTTemplate):
802
802
  elements = []
803
803
  duplicates = []
804
804
  for child in node.children:
805
- if isinstance(child, AST.ParamOp):
806
- ref_element = child.children[1]
807
- else:
808
- ref_element = child
805
+ ref_element = child.children[1] if isinstance(child, AST.ParamOp) else child
809
806
  if ref_element in elements:
810
807
  duplicates.append(ref_element)
811
808
  elements.append(self.visit(child).value)
@@ -842,9 +839,8 @@ class InterpreterAnalyzer(ASTTemplate):
842
839
  self.is_from_regular_aggregation = True
843
840
  operands.append(self.visit(child))
844
841
  self.is_from_regular_aggregation = False
845
- if node.op == CALC:
846
- if any([isinstance(operand, Dataset) for operand in operands]):
847
- raise SemanticError("1-3-35", op=node.op)
842
+ if node.op == CALC and any(isinstance(operand, Dataset) for operand in operands):
843
+ raise SemanticError("1-3-35", op=node.op)
848
844
  if node.op == AGGREGATE:
849
845
  # Extracting the role encoded inside the children assignments
850
846
  role_info = {
@@ -961,36 +957,52 @@ class InterpreterAnalyzer(ASTTemplate):
961
957
 
962
958
  # Analysis for data component and dataset
963
959
  else:
964
- if self.if_stack is None:
965
- self.if_stack = []
960
+ if self.condition_stack is None:
961
+ self.condition_stack = []
966
962
  if self.then_condition_dataset is None:
967
963
  self.then_condition_dataset = []
968
964
  if self.else_condition_dataset is None:
969
965
  self.else_condition_dataset = []
970
966
  self.generate_then_else_datasets(copy(condition))
971
967
 
972
- self.if_stack.append(THEN_ELSE["then"])
968
+ self.condition_stack.append(THEN_ELSE["then"])
973
969
  self.is_from_if = True
974
- self.nested_if = "T" if isinstance(node.thenOp, AST.If) else False
970
+ self.nested_condition = "T" if isinstance(node.thenOp, AST.If) else False
975
971
  thenOp = self.visit(node.thenOp)
976
972
  if isinstance(thenOp, Scalar) or not isinstance(node.thenOp, AST.BinOp):
977
973
  self.then_condition_dataset.pop()
978
- self.if_stack.pop()
974
+ self.condition_stack.pop()
979
975
 
980
- self.if_stack.append(THEN_ELSE["else"])
976
+ self.condition_stack.append(THEN_ELSE["else"])
981
977
  self.is_from_if = True
982
- self.nested_if = "E" if isinstance(node.elseOp, AST.If) else False
978
+ self.nested_condition = "E" if isinstance(node.elseOp, AST.If) else False
983
979
  elseOp = self.visit(node.elseOp)
984
980
  if isinstance(elseOp, Scalar) or (
985
981
  not isinstance(node.elseOp, AST.BinOp) and not isinstance(node.elseOp, AST.If)
986
982
  ):
987
983
  if len(self.else_condition_dataset) > 0:
988
984
  self.else_condition_dataset.pop()
989
- if len(self.if_stack) > 0:
990
- self.if_stack.pop()
985
+ if len(self.condition_stack) > 0:
986
+ self.condition_stack.pop()
991
987
 
992
988
  return If.analyze(condition, thenOp, elseOp)
993
989
 
990
+ def visit_Case(self, node: AST.Case) -> Any:
991
+ conditions: List[Any] = []
992
+ thenOps: List[Any] = []
993
+
994
+ if self.condition_stack is None:
995
+ self.condition_stack = []
996
+
997
+ while node.cases:
998
+ case = node.cases.pop(0)
999
+ self.is_from_condition = True
1000
+ conditions.append(self.visit(case.condition))
1001
+ self.is_from_condition = False
1002
+ thenOps.append(self.visit(case.thenOp))
1003
+
1004
+ return Case.analyze(conditions, thenOps, self.visit(node.elseOp))
1005
+
994
1006
  def visit_RenameNode(self, node: AST.RenameNode) -> Any:
995
1007
  if self.udo_params is not None:
996
1008
  if "#" in node.old_name:
@@ -1033,11 +1045,7 @@ class InterpreterAnalyzer(ASTTemplate):
1033
1045
  def visit_ParamOp(self, node: AST.ParamOp) -> None: # noqa: C901
1034
1046
  if node.op == ROUND:
1035
1047
  op_element = self.visit(node.children[0])
1036
- if len(node.params) != 0:
1037
- param_element = self.visit(node.params[0])
1038
- else:
1039
- param_element = None
1040
-
1048
+ param_element = self.visit(node.params[0]) if len(node.params) != 0 else None
1041
1049
  return Round.analyze(op_element, param_element)
1042
1050
 
1043
1051
  # Numeric Operator
@@ -1093,6 +1101,9 @@ class InterpreterAnalyzer(ASTTemplate):
1093
1101
  elif node.op == FILL_TIME_SERIES:
1094
1102
  mode = self.visit(node.params[0]) if len(node.params) == 1 else "all"
1095
1103
  return Fill_time_series.analyze(self.visit(node.children[0]), mode)
1104
+ elif node.op == DATE_ADD:
1105
+ params = [self.visit(node.params[0]), self.visit(node.params[1])]
1106
+ return Date_Add.analyze(self.visit(node.children[0]), params)
1096
1107
  elif node.op == CAST:
1097
1108
  operand = self.visit(node.children[0])
1098
1109
  scalar_type = node.children[1]
@@ -1211,11 +1222,8 @@ class InterpreterAnalyzer(ASTTemplate):
1211
1222
  if node.op == HIERARCHY:
1212
1223
  aux = []
1213
1224
  for rule in hr_info["rules"]:
1214
- if rule.rule.op == EQ:
1225
+ if rule.rule.op == EQ or rule.rule.op == WHEN and rule.rule.right.op == EQ:
1215
1226
  aux.append(rule)
1216
- elif rule.rule.op == WHEN:
1217
- if rule.rule.right.op == EQ:
1218
- aux.append(rule)
1219
1227
  # Filter only the rules with HRBinOP as =,
1220
1228
  # as they are the ones that will be computed
1221
1229
  if len(aux) == 0:
@@ -1323,9 +1331,11 @@ class InterpreterAnalyzer(ASTTemplate):
1323
1331
  if self.rule_data is None:
1324
1332
  return None
1325
1333
  filtering_indexes = list(filter_comp.data[filter_comp.data == True].index)
1334
+ nan_indexes = list(filter_comp.data[filter_comp.data.isnull()].index)
1326
1335
  # If no filtering indexes, then all datapoints are valid on DPR and HR
1327
1336
  if len(filtering_indexes) == 0 and not (self.is_from_hr_agg or self.is_from_hr_val):
1328
1337
  self.rule_data["bool_var"] = True
1338
+ self.rule_data.loc[nan_indexes, "bool_var"] = None
1329
1339
  return self.rule_data
1330
1340
  non_filtering_indexes = list(set(filter_comp.data.index) - set(filtering_indexes))
1331
1341
 
@@ -1340,6 +1350,7 @@ class InterpreterAnalyzer(ASTTemplate):
1340
1350
  self.rule_data, how="left", on=original_data.columns.tolist()
1341
1351
  )
1342
1352
  original_data.loc[non_filtering_indexes, "bool_var"] = True
1353
+ original_data.loc[nan_indexes, "bool_var"] = None
1343
1354
  return original_data
1344
1355
  elif node.op in HR_COMP_MAPPING:
1345
1356
  self.is_from_assignment = True
@@ -1385,12 +1396,10 @@ class InterpreterAnalyzer(ASTTemplate):
1385
1396
  left_operand.data = pd.DataFrame({measure_name: []})
1386
1397
  if right_operand.data is None:
1387
1398
  right_operand.data = pd.DataFrame({measure_name: []})
1388
- left_null_indexes = set(
1389
- list(left_operand.data[left_operand.data[measure_name].isnull()].index)
1390
- )
1391
- right_null_indexes = set(
1392
- list(right_operand.data[right_operand.data[measure_name].isnull()].index)
1393
- )
1399
+ left_null_indexes = set(left_operand.data[left_operand.data[
1400
+ measure_name].isnull()].index)
1401
+ right_null_indexes = set(right_operand.data[right_operand.data[
1402
+ measure_name].isnull()].index)
1394
1403
  # If no indexes are in common, then one datapoint is not null
1395
1404
  invalid_indexes = list(left_null_indexes.intersection(right_null_indexes))
1396
1405
  if len(invalid_indexes) > 0:
@@ -1478,22 +1487,18 @@ class InterpreterAnalyzer(ASTTemplate):
1478
1487
  if condition.data_type != BASIC_TYPES[bool]:
1479
1488
  raise ValueError("Only boolean scalars are allowed on data component condition")
1480
1489
  name = condition.name
1481
- if condition.data is None:
1482
- data = None
1483
- else:
1484
- data = condition.data
1490
+ data = None if condition.data is None else condition.data
1485
1491
 
1486
1492
  if data is not None:
1487
- if self.nested_if and self.if_stack is not None:
1493
+ if self.nested_condition and self.condition_stack is not None:
1488
1494
  merge_df = (
1489
1495
  self.then_condition_dataset[-1]
1490
- if self.if_stack[-1] == THEN_ELSE["then"]
1496
+ if self.condition_stack[-1] == THEN_ELSE["then"]
1491
1497
  else self.else_condition_dataset[-1]
1492
1498
  )
1493
1499
  indexes = merge_df.data[merge_df.data.columns[-1]]
1494
1500
  else:
1495
- indexes = data.index
1496
- data = data.fillna(False)
1501
+ indexes = data[data.notnull()].index
1497
1502
 
1498
1503
  if isinstance(condition, Dataset):
1499
1504
  filtered_data = data.iloc[indexes]
@@ -1540,12 +1545,12 @@ class InterpreterAnalyzer(ASTTemplate):
1540
1545
  if (
1541
1546
  self.then_condition_dataset is None
1542
1547
  or self.else_condition_dataset is None
1543
- or self.if_stack is None
1548
+ or self.condition_stack is None
1544
1549
  ):
1545
1550
  return left_operand, right_operand
1546
1551
  merge_dataset = (
1547
1552
  self.then_condition_dataset.pop()
1548
- if self.if_stack.pop() == THEN_ELSE["then"]
1553
+ if self.condition_stack.pop() == THEN_ELSE["then"]
1549
1554
  else (self.else_condition_dataset.pop())
1550
1555
  )
1551
1556
  merge_index = merge_dataset.data[merge_dataset.get_measures_names()[0]].to_list()
@@ -1617,8 +1622,8 @@ class InterpreterAnalyzer(ASTTemplate):
1617
1622
 
1618
1623
  # Getting Dataset elements
1619
1624
  result_components = {
1620
- c_name: copy(comp)
1621
- for c_name, comp in self.ruleset_dataset.components.items() # type: ignore[union-attr]
1625
+ comp_name: copy(comp)
1626
+ for comp_name, comp in self.ruleset_dataset.components.items() # type: ignore[union-attr]
1622
1627
  }
1623
1628
  if self.ruleset_signature is not None:
1624
1629
  hr_component = self.ruleset_signature["RULE_COMPONENT"]
@@ -2,17 +2,18 @@ import json
2
2
  from collections import Counter
3
3
  from dataclasses import dataclass
4
4
  from enum import Enum
5
- from typing import Dict, List, Optional, Union, Any, Type
5
+ from typing import Any, Dict, List, Optional, Type, Union
6
6
 
7
- import vtlengine.DataTypes as DataTypes
8
7
  import pandas as pd
9
8
  import sqlglot
10
9
  import sqlglot.expressions as exp
11
- from vtlengine.DataTypes import SCALAR_TYPES, ScalarType
12
- from vtlengine.DataTypes.TimeHandling import TimePeriodHandler
13
10
  from pandas import DataFrame as PandasDataFrame
14
11
  from pandas._testing import assert_frame_equal
15
12
 
13
+ import vtlengine.DataTypes as DataTypes
14
+ from vtlengine.DataTypes import SCALAR_TYPES, ScalarType
15
+ from vtlengine.DataTypes.TimeHandling import TimePeriodHandler
16
+ from vtlengine.Exceptions import SemanticError
16
17
 
17
18
  # from pyspark.pandas import DataFrame as SparkDataFrame, Series as SparkSeries
18
19
 
@@ -41,6 +42,13 @@ class Scalar:
41
42
  return same_name and same_type and same_value
42
43
 
43
44
 
45
+ Role_keys = [
46
+ "Identifier",
47
+ "Attribute",
48
+ "Measure",
49
+ ]
50
+
51
+
44
52
  class Role(Enum):
45
53
  """
46
54
  Enum class for the role of a component (Identifier, Attribute, Measure)
@@ -152,7 +160,7 @@ class Dataset:
152
160
  raise ValueError(
153
161
  "The number of components must match the number of columns in the data"
154
162
  )
155
- for name, component in self.components.items():
163
+ for name, _ in self.components.items():
156
164
  if name not in self.data.columns:
157
165
  raise ValueError(f"Component {name} not found in the data")
158
166
 
@@ -202,8 +210,8 @@ class Dataset:
202
210
  return True
203
211
  elif self.data is None or other.data is None:
204
212
  return False
205
- if len(self.data) == len(other.data) == 0:
206
- assert self.data.shape == other.data.shape
213
+ if len(self.data) == len(other.data) == 0 and self.data.shape != other.data.shape:
214
+ raise SemanticError("0-1-1-14", dataset1=self.name, dataset2=other.name)
207
215
 
208
216
  self.data.fillna("", inplace=True)
209
217
  other.data.fillna("", inplace=True)
@@ -227,11 +235,8 @@ class Dataset:
227
235
  lambda x: str(TimePeriodHandler(x)) if x != "" else "", na_action="ignore"
228
236
  )
229
237
  elif type_name in ["Integer", "Number"]:
230
- if type_name == "Integer":
231
- type_ = "int64"
232
- else:
233
- type_ = "float32"
234
- # We use here a number to avoid errors on equality on empty strings
238
+ type_ = "int64" if type_name == "Integer" else "float32"
239
+ # We use here a number to avoid errors on equality on empty strings
235
240
  self.data[comp.name] = (
236
241
  self.data[comp.name]
237
242
  .replace("", -1234997)
@@ -1,19 +1,8 @@
1
1
  from copy import copy
2
- from typing import List, Optional, Any
2
+ from typing import Any, List, Optional
3
3
 
4
4
  import duckdb
5
5
  import pandas as pd
6
- from vtlengine.DataTypes import (
7
- Integer,
8
- Number,
9
- unary_implicit_promotion,
10
- Boolean,
11
- String,
12
- Duration,
13
- TimeInterval,
14
- TimePeriod,
15
- Date,
16
- )
17
6
 
18
7
  import vtlengine.Operators as Operator
19
8
  from vtlengine.AST.Grammar.tokens import (
@@ -28,11 +17,22 @@ from vtlengine.AST.Grammar.tokens import (
28
17
  VAR_POP,
29
18
  VAR_SAMP,
30
19
  )
20
+ from vtlengine.DataTypes import (
21
+ Boolean,
22
+ Date,
23
+ Duration,
24
+ Integer,
25
+ Number,
26
+ String,
27
+ TimeInterval,
28
+ TimePeriod,
29
+ unary_implicit_promotion,
30
+ )
31
31
  from vtlengine.DataTypes.TimeHandling import (
32
32
  DURATION_MAPPING,
33
33
  DURATION_MAPPING_REVERSED,
34
- TimePeriodHandler,
35
34
  TimeIntervalHandler,
35
+ TimePeriodHandler,
36
36
  )
37
37
  from vtlengine.Exceptions import SemanticError
38
38
  from vtlengine.Model import Component, Dataset, Role
@@ -153,7 +153,7 @@ class Aggregation(Operator.Unary):
153
153
  if comp.role == Role.ATTRIBUTE:
154
154
  del result_components[comp_name]
155
155
  # Change Measure data type
156
- for comp_name, comp in result_components.items():
156
+ for _, comp in result_components.items():
157
157
  if comp.role == Role.MEASURE:
158
158
  unary_implicit_promotion(comp.data_type, cls.type_to_check)
159
159
  if cls.return_type is not None: