vtlengine 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vtlengine might be problematic. Click here for more details.

Files changed (50) hide show
  1. vtlengine/API/_InternalApi.py +19 -8
  2. vtlengine/API/__init__.py +9 -9
  3. vtlengine/AST/ASTConstructor.py +23 -43
  4. vtlengine/AST/ASTConstructorModules/Expr.py +147 -71
  5. vtlengine/AST/ASTConstructorModules/ExprComponents.py +104 -40
  6. vtlengine/AST/ASTConstructorModules/Terminals.py +28 -39
  7. vtlengine/AST/ASTTemplate.py +16 -1
  8. vtlengine/AST/DAG/__init__.py +12 -15
  9. vtlengine/AST/Grammar/Vtl.g4 +49 -20
  10. vtlengine/AST/Grammar/VtlTokens.g4 +13 -1
  11. vtlengine/AST/Grammar/lexer.py +1293 -1183
  12. vtlengine/AST/Grammar/parser.py +5758 -3939
  13. vtlengine/AST/Grammar/tokens.py +12 -0
  14. vtlengine/AST/VtlVisitor.py +9 -2
  15. vtlengine/AST/__init__.py +21 -3
  16. vtlengine/DataTypes/TimeHandling.py +12 -7
  17. vtlengine/DataTypes/__init__.py +17 -24
  18. vtlengine/Exceptions/__init__.py +43 -1
  19. vtlengine/Exceptions/messages.py +82 -62
  20. vtlengine/Interpreter/__init__.py +125 -120
  21. vtlengine/Model/__init__.py +17 -12
  22. vtlengine/Operators/Aggregation.py +14 -14
  23. vtlengine/Operators/Analytic.py +56 -31
  24. vtlengine/Operators/Assignment.py +2 -3
  25. vtlengine/Operators/Boolean.py +5 -7
  26. vtlengine/Operators/CastOperator.py +12 -13
  27. vtlengine/Operators/Clause.py +11 -13
  28. vtlengine/Operators/Comparison.py +31 -17
  29. vtlengine/Operators/Conditional.py +157 -17
  30. vtlengine/Operators/General.py +4 -4
  31. vtlengine/Operators/HROperators.py +41 -34
  32. vtlengine/Operators/Join.py +18 -22
  33. vtlengine/Operators/Numeric.py +76 -39
  34. vtlengine/Operators/RoleSetter.py +6 -8
  35. vtlengine/Operators/Set.py +7 -12
  36. vtlengine/Operators/String.py +19 -27
  37. vtlengine/Operators/Time.py +366 -43
  38. vtlengine/Operators/Validation.py +4 -7
  39. vtlengine/Operators/__init__.py +38 -41
  40. vtlengine/Utils/__init__.py +149 -94
  41. vtlengine/__init__.py +1 -1
  42. vtlengine/files/output/__init__.py +2 -2
  43. vtlengine/files/output/_time_period_representation.py +0 -1
  44. vtlengine/files/parser/__init__.py +18 -18
  45. vtlengine/files/parser/_time_checking.py +3 -2
  46. {vtlengine-1.0.1.dist-info → vtlengine-1.0.3.dist-info}/METADATA +17 -5
  47. vtlengine-1.0.3.dist-info/RECORD +58 -0
  48. vtlengine-1.0.1.dist-info/RECORD +0 -58
  49. {vtlengine-1.0.1.dist-info → vtlengine-1.0.3.dist-info}/LICENSE.md +0 -0
  50. {vtlengine-1.0.1.dist-info → vtlengine-1.0.3.dist-info}/WHEEL +0 -0
@@ -3,8 +3,6 @@ from typing import List, Optional
3
3
 
4
4
  import duckdb
5
5
 
6
- from vtlengine.Exceptions import SemanticError
7
-
8
6
  # if os.environ.get("SPARK"):
9
7
  # import pyspark.pandas as pd
10
8
  # else:
@@ -32,6 +30,7 @@ from vtlengine.AST.Grammar.tokens import (
32
30
  VAR_SAMP,
33
31
  )
34
32
  from vtlengine.DataTypes import COMP_NAME_MAPPING, Integer, Number, unary_implicit_promotion
33
+ from vtlengine.Exceptions import SemanticError
35
34
  from vtlengine.Model import Component, Dataset, Role
36
35
 
37
36
 
@@ -58,11 +57,9 @@ class Analytic(Operator.Unary):
58
57
  ordering: Optional[List[OrderBy]],
59
58
  window: Optional[Windowing],
60
59
  params: Optional[List[int]],
60
+ component_name: Optional[str] = None,
61
61
  ) -> Dataset:
62
- if ordering is None:
63
- order_components = []
64
- else:
65
- order_components = [o.component for o in ordering]
62
+ order_components = [] if ordering is None else [o.component for o in ordering]
66
63
  identifier_names = operand.get_identifiers_names()
67
64
  result_components = operand.components.copy()
68
65
 
@@ -83,25 +80,51 @@ class Analytic(Operator.Unary):
83
80
  raise SemanticError(
84
81
  "1-1-1-10", op=cls.op, comp_name=comp_name, dataset_name=operand.name
85
82
  )
86
- measures = operand.get_measures()
87
- if measures is None:
88
- raise SemanticError("1-1-1-8", op=cls.op, name=operand.name)
89
- if cls.type_to_check is not None:
90
- for measure in measures:
91
- unary_implicit_promotion(measure.data_type, cls.type_to_check)
92
- if cls.return_type is not None:
93
- for measure in measures:
94
- new_measure = copy(measure)
95
- new_measure.data_type = cls.return_type
96
- result_components[measure.name] = new_measure
97
- if cls.op == COUNT and len(measures) <= 1:
98
- measure_name = COMP_NAME_MAPPING[cls.return_type]
99
- nullable = False if len(measures) == 0 else measures[0].nullable
100
- if len(measures) == 1:
101
- del result_components[measures[0].name]
102
- result_components[measure_name] = Component(
103
- name=measure_name, data_type=cls.return_type, role=Role.MEASURE, nullable=nullable
104
- )
83
+ if component_name is not None:
84
+ if cls.type_to_check is not None:
85
+ unary_implicit_promotion(
86
+ operand.components[component_name].data_type, cls.type_to_check
87
+ )
88
+ if cls.return_type is not None:
89
+ result_components[component_name] = Component(
90
+ name=component_name,
91
+ data_type=cls.return_type,
92
+ role=operand.components[component_name].role,
93
+ nullable=operand.components[component_name].nullable,
94
+ )
95
+ if cls.op == COUNT:
96
+ measure_name = COMP_NAME_MAPPING[cls.return_type]
97
+ result_components[measure_name] = Component(
98
+ name=measure_name,
99
+ data_type=cls.return_type,
100
+ role=Role.MEASURE,
101
+ nullable=operand.components[component_name].nullable,
102
+ )
103
+ if component_name in result_components:
104
+ del result_components[component_name]
105
+ else:
106
+ measures = operand.get_measures()
107
+ if len(measures) == 0:
108
+ raise SemanticError("1-1-1-8", op=cls.op, name=operand.name)
109
+ if cls.type_to_check is not None:
110
+ for measure in measures:
111
+ unary_implicit_promotion(measure.data_type, cls.type_to_check)
112
+ if cls.return_type is not None:
113
+ for measure in measures:
114
+ new_measure = copy(measure)
115
+ new_measure.data_type = cls.return_type
116
+ result_components[measure.name] = new_measure
117
+ if cls.op == COUNT and len(measures) <= 1:
118
+ measure_name = COMP_NAME_MAPPING[cls.return_type]
119
+ nullable = False if len(measures) == 0 else measures[0].nullable
120
+ if len(measures) == 1:
121
+ del result_components[measures[0].name]
122
+ result_components[measure_name] = Component(
123
+ name=measure_name,
124
+ data_type=cls.return_type,
125
+ role=Role.MEASURE,
126
+ nullable=nullable,
127
+ )
105
128
 
106
129
  return Dataset(name="result", components=result_components, data=None)
107
130
 
@@ -151,10 +174,7 @@ class Analytic(Operator.Unary):
151
174
  window_str = f"{mode} BETWEEN {window.start} {start_mode} AND {window.stop} {stop_mode}"
152
175
 
153
176
  # Partitioning
154
- if len(partitioning) > 0:
155
- partition = "PARTITION BY " + ", ".join(partitioning)
156
- else:
157
- partition = ""
177
+ partition = "PARTITION BY " + ", ".join(partitioning) if len(partitioning) > 0 else ""
158
178
 
159
179
  # Ordering
160
180
  order_str = ""
@@ -205,12 +225,17 @@ class Analytic(Operator.Unary):
205
225
  ordering: Optional[List[OrderBy]],
206
226
  window: Optional[Windowing],
207
227
  params: Optional[List[int]],
228
+ component_name: Optional[str] = None,
208
229
  ) -> Dataset:
209
- result = cls.validate(operand, partitioning, ordering, window, params)
230
+ result = cls.validate(operand, partitioning, ordering, window, params, component_name)
210
231
  df = operand.data.copy() if operand.data is not None else pd.DataFrame()
211
- measure_names = operand.get_measures_names()
212
232
  identifier_names = operand.get_identifiers_names()
213
233
 
234
+ if component_name is not None:
235
+ measure_names = [component_name]
236
+ else:
237
+ measure_names = operand.get_measures_names()
238
+
214
239
  result.data = cls.analyticfunc(
215
240
  df=df,
216
241
  partitioning=partitioning,
@@ -1,9 +1,8 @@
1
- from typing import Union, Any
2
-
3
- from vtlengine.Operators import Binary
1
+ from typing import Any, Union
4
2
 
5
3
  from vtlengine.Exceptions import SemanticError
6
4
  from vtlengine.Model import DataComponent, Dataset
5
+ from vtlengine.Operators import Binary
7
6
 
8
7
  ALL_MODEL_TYPES = Union[DataComponent, Dataset]
9
8
 
@@ -2,13 +2,13 @@
2
2
  # import pyspark.pandas as pd
3
3
  # else:
4
4
  # import pandas as pd
5
- import pandas as pd
5
+ from typing import Any, Optional
6
6
 
7
- from typing import Optional, Any
7
+ import pandas as pd
8
8
 
9
- from vtlengine.AST.Grammar.tokens import AND, OR, XOR, NOT
10
- from vtlengine.DataTypes import Boolean
11
9
  import vtlengine.Operators as Operator
10
+ from vtlengine.AST.Grammar.tokens import AND, NOT, OR, XOR
11
+ from vtlengine.DataTypes import Boolean
12
12
 
13
13
 
14
14
  class Unary(Operator.Unary):
@@ -30,9 +30,7 @@ class Binary(Operator.Binary):
30
30
 
31
31
  @classmethod
32
32
  def apply_operation_two_series(cls, left_series: Any, right_series: Any) -> Any:
33
- result = cls.comp_op(
34
- left_series.astype("bool[pyarrow]"), right_series.astype("bool[pyarrow]")
35
- )
33
+ result = cls.comp_op(left_series.astype("boolean"), right_series.astype("boolean"))
36
34
  return result.replace({pd.NA: None}).astype(object)
37
35
 
38
36
  @classmethod
@@ -1,27 +1,27 @@
1
1
  from copy import copy
2
- from typing import Optional, Any, Union, Type
2
+ from typing import Any, Optional, Type, Union
3
3
 
4
- import vtlengine.Operators as Operator
5
4
  import pandas as pd
5
+
6
+ import vtlengine.Operators as Operator
7
+ from vtlengine.AST.Grammar.tokens import CAST
6
8
  from vtlengine.DataTypes import (
7
9
  COMP_NAME_MAPPING,
8
10
  EXPLICIT_WITH_MASK_TYPE_PROMOTION_MAPPING,
9
11
  EXPLICIT_WITHOUT_MASK_TYPE_PROMOTION_MAPPING,
10
12
  IMPLICIT_TYPE_PROMOTION_MAPPING,
11
- String,
12
- Number,
13
- TimeInterval,
13
+ SCALAR_TYPES_CLASS_REVERSE,
14
14
  Date,
15
- TimePeriod,
16
15
  Duration,
17
- SCALAR_TYPES_CLASS_REVERSE,
16
+ Number,
18
17
  ScalarType,
18
+ String,
19
+ TimeInterval,
20
+ TimePeriod,
19
21
  )
20
22
  from vtlengine.DataTypes.TimeHandling import str_period_to_date
21
-
22
- from vtlengine.AST.Grammar.tokens import CAST
23
23
  from vtlengine.Exceptions import SemanticError
24
- from vtlengine.Model import Component, Dataset, Role, Scalar, DataComponent
24
+ from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar
25
25
 
26
26
  duration_mapping = {"A": 6, "S": 5, "Q": 4, "M": 3, "W": 2, "D": 1}
27
27
 
@@ -286,9 +286,8 @@ class Cast(Operator.Unary):
286
286
  mask: Optional[str] = None,
287
287
  ) -> Any:
288
288
 
289
- if mask is not None:
290
- if not isinstance(mask, str):
291
- raise Exception(f"{cls.op} mask must be a string")
289
+ if mask is not None and not isinstance(mask, str):
290
+ raise Exception(f"{cls.op} mask must be a string")
292
291
 
293
292
  if isinstance(operand, Dataset):
294
293
  return cls.dataset_validation(operand, scalarType, mask)
@@ -1,21 +1,20 @@
1
- import pandas as pd
2
-
3
1
  from copy import copy
4
- from typing import List, Union, Type
2
+ from typing import List, Type, Union
3
+
4
+ import pandas as pd
5
5
 
6
+ from vtlengine.AST import RenameNode
7
+ from vtlengine.AST.Grammar.tokens import AGGREGATE, CALC, DROP, KEEP, RENAME, SUBSPACE
6
8
  from vtlengine.DataTypes import (
7
9
  Boolean,
10
+ ScalarType,
8
11
  String,
9
12
  check_unary_implicit_promotion,
10
13
  unary_implicit_promotion,
11
- ScalarType,
12
14
  )
13
- from vtlengine.Operators import Operator
14
-
15
- from vtlengine.AST import RenameNode
16
- from vtlengine.AST.Grammar.tokens import KEEP, DROP, RENAME, SUBSPACE, CALC, AGGREGATE
17
15
  from vtlengine.Exceptions import SemanticError
18
16
  from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar
17
+ from vtlengine.Operators import Operator
19
18
 
20
19
 
21
20
  class Calc(Operator):
@@ -162,9 +161,8 @@ class Keep(Operator):
162
161
  def evaluate(cls, operands: List[str], dataset: Dataset) -> Dataset:
163
162
  if len(operands) == 0:
164
163
  raise ValueError("Keep clause requires at least one operand")
165
- if dataset is None:
166
- if sum(isinstance(operand, Dataset) for operand in operands) != 1:
167
- raise ValueError("Keep clause requires at most one dataset operand")
164
+ if dataset is None and sum(isinstance(operand, Dataset) for operand in operands) != 1:
165
+ raise ValueError("Keep clause requires at most one dataset operand")
168
166
  result_dataset = cls.validate(operands, dataset)
169
167
  if dataset.data is not None:
170
168
  result_dataset.data = dataset.data[dataset.get_identifiers_names() + operands]
@@ -212,11 +210,11 @@ class Rename(Operator):
212
210
  raise SemanticError("1-3-1", alias=duplicates)
213
211
 
214
212
  for operand in operands:
215
- if operand.old_name not in dataset.components.keys():
213
+ if operand.old_name not in dataset.components:
216
214
  raise SemanticError(
217
215
  "1-1-1-10", op=cls.op, comp_name=operand.old_name, dataset_name=dataset.name
218
216
  )
219
- if operand.new_name in dataset.components.keys():
217
+ if operand.new_name in dataset.components:
220
218
  raise SemanticError(
221
219
  "1-1-6-8", op=cls.op, comp_name=operand.new_name, dataset_name=dataset.name
222
220
  )
@@ -3,15 +3,13 @@ import re
3
3
  from copy import copy
4
4
  from typing import Any, Optional, Union
5
5
 
6
- from vtlengine.Exceptions import SemanticError
7
- from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar, ScalarSet
8
-
9
6
  # if os.environ.get("SPARK"):
10
7
  # import pyspark.pandas as pd
11
8
  # else:
12
9
  # import pandas as pd
13
10
  import pandas as pd
14
11
 
12
+ import vtlengine.Operators as Operator
15
13
  from vtlengine.AST.Grammar.tokens import (
16
14
  CHARSET_MATCH,
17
15
  EQ,
@@ -24,8 +22,9 @@ from vtlengine.AST.Grammar.tokens import (
24
22
  NEQ,
25
23
  NOT_IN,
26
24
  )
27
- from vtlengine.DataTypes import Boolean, COMP_NAME_MAPPING, String, Number, Null
28
- import vtlengine.Operators as Operator
25
+ from vtlengine.DataTypes import COMP_NAME_MAPPING, Boolean, Null, Number, String
26
+ from vtlengine.Exceptions import SemanticError
27
+ from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar, ScalarSet
29
28
 
30
29
 
31
30
  class Unary(Operator.Unary):
@@ -75,11 +74,11 @@ class Binary(Operator.Binary):
75
74
  return_type = Boolean
76
75
 
77
76
  @classmethod
78
- def _cast_values(
79
- cls, x: Optional[Union[int, float, str, bool]], y: Optional[Union[int, float, str, bool]]
80
- ) -> Any:
81
- # Cast both values to the same data type
82
- # An integer can be considered a bool, we must check first boolean, then numbers
77
+ def _cast_values(cls,
78
+ x: Optional[Union[int, float, str, bool]],
79
+ y: Optional[Union[int, float, str, bool]]
80
+ ) -> Any:
81
+ # Cast values to compatible types for comparison
83
82
  try:
84
83
  if isinstance(x, str) and isinstance(y, bool):
85
84
  y = String.cast(y)
@@ -97,6 +96,7 @@ class Binary(Operator.Binary):
97
96
 
98
97
  @classmethod
99
98
  def op_func(cls, x: Any, y: Any) -> Any:
99
+ # Return None if any of the values are NaN
100
100
  if pd.isnull(x) or pd.isnull(y):
101
101
  return None
102
102
  x, y = cls._cast_values(x, y)
@@ -104,12 +104,29 @@ class Binary(Operator.Binary):
104
104
 
105
105
  @classmethod
106
106
  def apply_operation_series_scalar(cls, series: Any, scalar: Any, series_left: bool) -> Any:
107
- if scalar is None:
107
+ if pd.isnull(scalar):
108
108
  return pd.Series(None, index=series.index)
109
+
110
+ first_non_null = series.dropna().iloc[0] if not series.dropna().empty else None
111
+ if first_non_null is not None:
112
+ scalar, first_non_null = cls._cast_values(scalar, first_non_null)
113
+
114
+ series_type = pd.api.types.infer_dtype(series, skipna=True)
115
+ first_non_null_type = pd.api.types.infer_dtype([first_non_null])
116
+
117
+ if series_type != first_non_null_type:
118
+ if isinstance(first_non_null, str):
119
+ series = series.astype(str)
120
+ elif isinstance(first_non_null, (int, float)):
121
+ series = series.astype(float)
122
+
123
+ op = cls.py_op if cls.py_op is not None else cls.op_func
109
124
  if series_left:
110
- return series.map(lambda x: cls.op_func(x, scalar), na_action="ignore")
125
+ result = series.map(lambda x: op(x, scalar), na_action="ignore")
111
126
  else:
112
- return series.map(lambda x: cls.op_func(scalar, x), na_action="ignore")
127
+ result = series.map(lambda x: op(scalar, x), na_action="ignore")
128
+
129
+ return result
113
130
 
114
131
  @classmethod
115
132
  def apply_return_type_dataset(
@@ -408,10 +425,7 @@ class ExistIn(Operator.Operator):
408
425
  reference_identifiers_names = left_id_names
409
426
 
410
427
  # Checking if the left dataset is a subset of the right dataset
411
- if is_subset_left:
412
- common_columns = left_id_names
413
- else:
414
- common_columns = right_id_names
428
+ common_columns = left_id_names if is_subset_left else right_id_names
415
429
 
416
430
  # Check if the common identifiers are equal between the two datasets
417
431
  if dataset_1.data is not None and dataset_2.data is not None:
@@ -1,31 +1,30 @@
1
1
  from copy import copy
2
- from typing import Union, Any
2
+ from typing import Any, List, Union
3
3
 
4
4
  import numpy as np
5
5
 
6
+ # if os.environ.get("SPARK", False):
7
+ # import pyspark.pandas as pd
8
+ # else:
9
+ # import pandas as pd
10
+ import pandas as pd
11
+
6
12
  from vtlengine.DataTypes import (
7
- Boolean,
8
13
  COMP_NAME_MAPPING,
9
- binary_implicit_promotion,
10
14
  SCALAR_TYPES_CLASS_REVERSE,
15
+ Boolean,
11
16
  Null,
17
+ binary_implicit_promotion,
12
18
  )
13
- from vtlengine.Operators import Operator, Binary
14
-
15
19
  from vtlengine.Exceptions import SemanticError
16
- from vtlengine.Model import Scalar, DataComponent, Dataset, Role
17
-
18
- # if os.environ.get("SPARK", False):
19
- # import pyspark.pandas as pd
20
- # else:
21
- # import pandas as pd
22
- import pandas as pd
20
+ from vtlengine.Model import DataComponent, Dataset, Role, Scalar
21
+ from vtlengine.Operators import Binary, Operator
23
22
 
24
23
 
25
24
  class If(Operator):
26
25
  """
27
26
  If class:
28
- `If-then-else <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=225&zoom=100,72,142>`_ operator # noqa E501
27
+ `If-then-else <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=225&zoom=100,72,142>`_ operator
29
28
  inherits from Operator, a superclass that contains general validate and evaluate class methods.
30
29
  It has the following class methods:
31
30
  Class methods:
@@ -40,7 +39,7 @@ class If(Operator):
40
39
  validate: Class method that has two branches so datacomponent and datasets can be validated. With datacomponent,
41
40
  the code reviews if it is actually a Measure and if it is a binary operation. Dataset branch reviews if the
42
41
  identifiers are the same in 'if', 'then' and 'else'.
43
- """
42
+ """ # noqa E501
44
43
 
45
44
  @classmethod
46
45
  def evaluate(cls, condition: Any, true_branch: Any, false_branch: Any) -> Any:
@@ -108,7 +107,7 @@ class If(Operator):
108
107
  )
109
108
  if isinstance(result, Dataset):
110
109
  drop_columns = [
111
- column for column in result.data.columns if column not in result.components.keys()
110
+ column for column in result.data.columns if column not in result.components
112
111
  ]
113
112
  result.data = result.data.dropna(subset=drop_columns).drop(columns=drop_columns)
114
113
  if isinstance(true_branch, Scalar) and isinstance(false_branch, Scalar):
@@ -213,14 +212,14 @@ class If(Operator):
213
212
  class Nvl(Binary):
214
213
  """
215
214
  Null class:
216
- `Nvl <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=229&zoom=100,72,370>`_operator class. # noqa E501
215
+ `Nvl <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=229&zoom=100,72,370>`_operator class.
217
216
  It has the following class methods:
218
217
 
219
218
  Class methods:
220
219
  Validate: Class method that validates if the operation at scalar,
221
220
  datacomponent or dataset level can be performed.
222
221
  Evaluate: Evaluates the actual operation, returning the result.
223
- """
222
+ """ # noqa E501
224
223
 
225
224
  @classmethod
226
225
  def evaluate(cls, left: Any, right: Any) -> Union[Scalar, DataComponent, Dataset]:
@@ -288,3 +287,144 @@ class Nvl(Binary):
288
287
  for comp in result_components.values():
289
288
  comp.nullable = False
290
289
  return Dataset(name="result", components=result_components, data=None)
290
+
291
+
292
+ class Case(Operator):
293
+
294
+ @classmethod
295
+ def evaluate(
296
+ cls, conditions: List[Any], thenOps: List[Any], elseOp: Any
297
+ ) -> Union[Scalar, DataComponent, Dataset]:
298
+
299
+ result = cls.validate(conditions, thenOps, elseOp)
300
+
301
+ if isinstance(result, Scalar):
302
+ result.value = elseOp.value
303
+ for i in range(len(conditions)):
304
+ if conditions[i].value:
305
+ result.value = thenOps[i].value
306
+
307
+ if isinstance(result, DataComponent):
308
+ result.data = pd.Series(None, index=conditions[0].data.index)
309
+
310
+ for i, condition in enumerate(conditions):
311
+ value = thenOps[i].value if isinstance(thenOps[i], Scalar) else thenOps[i].data
312
+ result.data = np.where(
313
+ condition.data, value, result.data # type: ignore[call-overload]
314
+ )
315
+
316
+ condition_mask_else = ~np.any([condition.data for condition in conditions], axis=0)
317
+ else_value = elseOp.value if isinstance(elseOp, Scalar) else elseOp.data
318
+ result.data = pd.Series(
319
+ np.where(condition_mask_else, else_value, result.data),
320
+ index=conditions[0].data.index,
321
+ )
322
+
323
+ if isinstance(result, Dataset):
324
+ identifiers = result.get_identifiers_names()
325
+ columns = [col for col in result.get_components_names() if col not in identifiers]
326
+ result.data = (
327
+ conditions[0].data[identifiers]
328
+ if conditions[0].data is not None
329
+ else pd.DataFrame(columns=identifiers)
330
+ )
331
+
332
+ for i in range(len(conditions)):
333
+ condition = conditions[i]
334
+ bool_col = next(x.name for x in condition.get_measures() if x.data_type == Boolean)
335
+ condition_mask = condition.data[bool_col]
336
+
337
+ result.data.loc[condition_mask, columns] = (
338
+ thenOps[i].value
339
+ if isinstance(thenOps[i], Scalar)
340
+ else thenOps[i].data.loc[condition_mask, columns]
341
+ )
342
+
343
+ condition_mask_else = ~np.logical_or.reduce(
344
+ [
345
+ condition.data[
346
+ next(x.name for x in condition.get_measures() if x.data_type == Boolean)
347
+ ].astype(bool)
348
+ for condition in conditions
349
+ ]
350
+ )
351
+
352
+ result.data.loc[condition_mask_else, columns] = (
353
+ elseOp.value
354
+ if isinstance(elseOp, Scalar)
355
+ else elseOp.data.loc[condition_mask_else, columns]
356
+ )
357
+
358
+ return result
359
+
360
+ @classmethod
361
+ def validate(
362
+ cls, conditions: List[Any], thenOps: List[Any], elseOp: Any
363
+ ) -> Union[Scalar, DataComponent, Dataset]:
364
+
365
+ if len(set(map(type, conditions))) > 1:
366
+ raise SemanticError("2-1-9-1", op=cls.op)
367
+
368
+ ops = thenOps + [elseOp]
369
+ then_else_types = set(map(type, ops))
370
+ condition_type = type(conditions[0])
371
+
372
+ if condition_type is Scalar:
373
+ for condition in conditions:
374
+ if condition.data_type != Boolean:
375
+ raise SemanticError("2-1-9-2", op=cls.op, name=condition.name)
376
+ if list(then_else_types) != [Scalar]:
377
+ raise SemanticError("2-1-9-3", op=cls.op)
378
+
379
+ # The output data type is the data type of the last then operation that has a true
380
+ # condition, defaulting to the data type of the else operation if no condition is true
381
+ output_data_type = elseOp.data_type
382
+ for i in range(len(conditions)):
383
+ if conditions[i].value:
384
+ output_data_type = thenOps[i].data_type
385
+
386
+ return Scalar(
387
+ name="result",
388
+ value=None,
389
+ data_type=output_data_type,
390
+ )
391
+
392
+ elif condition_type is DataComponent:
393
+ for condition in conditions:
394
+ if not condition.data_type == Boolean:
395
+ raise SemanticError("2-1-9-4", op=cls.op, name=condition.name)
396
+
397
+ nullable = any(
398
+ thenOp.nullable if isinstance(thenOp, DataComponent) else thenOp.data_type == Null
399
+ for thenOp in ops
400
+ )
401
+
402
+ data_type = ops[0].data_type
403
+ for op in ops[1:]:
404
+ data_type = binary_implicit_promotion(data_type, op.data_type)
405
+
406
+ return DataComponent(
407
+ name="result",
408
+ data=None,
409
+ data_type=data_type,
410
+ role=Role.MEASURE,
411
+ nullable=nullable,
412
+ )
413
+
414
+ # Dataset
415
+ for condition in conditions:
416
+ if len(condition.get_measures_names()) != 1:
417
+ raise SemanticError("1-1-1-4", op=cls.op)
418
+ if condition.get_measures()[0].data_type != Boolean:
419
+ raise SemanticError("2-1-9-5", op=cls.op, name=condition.name)
420
+
421
+ if Dataset not in then_else_types:
422
+ raise SemanticError("2-1-9-6", op=cls.op)
423
+
424
+ components = next(op for op in ops if isinstance(op, Dataset)).components
425
+ comp_names = [comp.name for comp in components.values()]
426
+ for op in ops:
427
+ if isinstance(op, Dataset) and op.get_components_names() != comp_names:
428
+ raise SemanticError("2-1-9-7", op=cls.op)
429
+
430
+ return Dataset(name="result", components=components, data=None)
@@ -1,11 +1,11 @@
1
- from typing import Dict, List, Any, Union
1
+ import sqlite3
2
+ from typing import Any, Dict, List, Union
2
3
 
3
4
  import pandas as pd
4
- import sqlite3
5
5
 
6
6
  from vtlengine.DataTypes import COMP_NAME_MAPPING
7
7
  from vtlengine.Exceptions import SemanticError
8
- from vtlengine.Model import Dataset, ExternalRoutine, Role, Component, DataComponent
8
+ from vtlengine.Model import Component, DataComponent, Dataset, ExternalRoutine, Role
9
9
  from vtlengine.Operators import Binary, Unary
10
10
 
11
11
 
@@ -143,7 +143,7 @@ class Eval(Unary):
143
143
  df = cls._execute_query(
144
144
  external_routine.query, external_routine.dataset_names, empty_data_dict
145
145
  )
146
- component_names = [name for name in df.columns]
146
+ component_names = df.columns.tolist()
147
147
  for comp_name in component_names:
148
148
  if comp_name not in output.components:
149
149
  raise SemanticError(