vtlengine 1.0.2__py3-none-any.whl → 1.0.3rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vtlengine might be problematic. Click here for more details.

Files changed (46) hide show
  1. vtlengine/API/_InternalApi.py +12 -5
  2. vtlengine/API/__init__.py +8 -8
  3. vtlengine/AST/ASTConstructor.py +23 -43
  4. vtlengine/AST/ASTConstructorModules/Expr.py +69 -84
  5. vtlengine/AST/ASTConstructorModules/ExprComponents.py +47 -57
  6. vtlengine/AST/ASTConstructorModules/Terminals.py +28 -39
  7. vtlengine/AST/ASTTemplate.py +0 -1
  8. vtlengine/AST/DAG/__init__.py +12 -15
  9. vtlengine/AST/Grammar/tokens.py +2 -2
  10. vtlengine/AST/VtlVisitor.py +0 -1
  11. vtlengine/AST/__init__.py +2 -3
  12. vtlengine/DataTypes/TimeHandling.py +10 -7
  13. vtlengine/DataTypes/__init__.py +17 -24
  14. vtlengine/Exceptions/__init__.py +3 -5
  15. vtlengine/Exceptions/messages.py +68 -56
  16. vtlengine/Interpreter/__init__.py +82 -103
  17. vtlengine/Model/__init__.py +10 -12
  18. vtlengine/Operators/Aggregation.py +14 -14
  19. vtlengine/Operators/Analytic.py +3 -10
  20. vtlengine/Operators/Assignment.py +2 -3
  21. vtlengine/Operators/Boolean.py +5 -7
  22. vtlengine/Operators/CastOperator.py +12 -13
  23. vtlengine/Operators/Clause.py +11 -13
  24. vtlengine/Operators/Comparison.py +31 -17
  25. vtlengine/Operators/Conditional.py +48 -49
  26. vtlengine/Operators/General.py +4 -4
  27. vtlengine/Operators/HROperators.py +41 -34
  28. vtlengine/Operators/Join.py +18 -22
  29. vtlengine/Operators/Numeric.py +44 -45
  30. vtlengine/Operators/RoleSetter.py +6 -8
  31. vtlengine/Operators/Set.py +7 -12
  32. vtlengine/Operators/String.py +19 -27
  33. vtlengine/Operators/Time.py +298 -109
  34. vtlengine/Operators/Validation.py +4 -7
  35. vtlengine/Operators/__init__.py +38 -41
  36. vtlengine/Utils/__init__.py +133 -114
  37. vtlengine/__init__.py +1 -1
  38. vtlengine/files/output/__init__.py +2 -2
  39. vtlengine/files/output/_time_period_representation.py +0 -1
  40. vtlengine/files/parser/__init__.py +16 -18
  41. vtlengine/files/parser/_time_checking.py +1 -2
  42. {vtlengine-1.0.2.dist-info → vtlengine-1.0.3rc1.dist-info}/METADATA +1 -3
  43. vtlengine-1.0.3rc1.dist-info/RECORD +58 -0
  44. vtlengine-1.0.2.dist-info/RECORD +0 -58
  45. {vtlengine-1.0.2.dist-info → vtlengine-1.0.3rc1.dist-info}/LICENSE.md +0 -0
  46. {vtlengine-1.0.2.dist-info → vtlengine-1.0.3rc1.dist-info}/WHEEL +0 -0
@@ -2,17 +2,18 @@ import json
2
2
  from collections import Counter
3
3
  from dataclasses import dataclass
4
4
  from enum import Enum
5
- from typing import Dict, List, Optional, Union, Any, Type
5
+ from typing import Any, Dict, List, Optional, Type, Union
6
6
 
7
- import vtlengine.DataTypes as DataTypes
8
7
  import pandas as pd
9
8
  import sqlglot
10
9
  import sqlglot.expressions as exp
11
- from vtlengine.DataTypes import SCALAR_TYPES, ScalarType
12
- from vtlengine.DataTypes.TimeHandling import TimePeriodHandler
13
10
  from pandas import DataFrame as PandasDataFrame
14
11
  from pandas._testing import assert_frame_equal
15
12
 
13
+ import vtlengine.DataTypes as DataTypes
14
+ from vtlengine.DataTypes import SCALAR_TYPES, ScalarType
15
+ from vtlengine.DataTypes.TimeHandling import TimePeriodHandler
16
+ from vtlengine.Exceptions import SemanticError
16
17
 
17
18
  # from pyspark.pandas import DataFrame as SparkDataFrame, Series as SparkSeries
18
19
 
@@ -159,7 +160,7 @@ class Dataset:
159
160
  raise ValueError(
160
161
  "The number of components must match the number of columns in the data"
161
162
  )
162
- for name, component in self.components.items():
163
+ for name, _ in self.components.items():
163
164
  if name not in self.data.columns:
164
165
  raise ValueError(f"Component {name} not found in the data")
165
166
 
@@ -209,8 +210,8 @@ class Dataset:
209
210
  return True
210
211
  elif self.data is None or other.data is None:
211
212
  return False
212
- if len(self.data) == len(other.data) == 0:
213
- assert self.data.shape == other.data.shape
213
+ if len(self.data) == len(other.data) == 0 and self.data.shape != other.data.shape:
214
+ raise SemanticError("0-1-1-14", dataset1=self.name, dataset2=other.name)
214
215
 
215
216
  self.data.fillna("", inplace=True)
216
217
  other.data.fillna("", inplace=True)
@@ -234,11 +235,8 @@ class Dataset:
234
235
  lambda x: str(TimePeriodHandler(x)) if x != "" else "", na_action="ignore"
235
236
  )
236
237
  elif type_name in ["Integer", "Number"]:
237
- if type_name == "Integer":
238
- type_ = "int64"
239
- else:
240
- type_ = "float32"
241
- # We use here a number to avoid errors on equality on empty strings
238
+ type_ = "int64" if type_name == "Integer" else "float32"
239
+ # We use here a number to avoid errors on equality on empty strings
242
240
  self.data[comp.name] = (
243
241
  self.data[comp.name]
244
242
  .replace("", -1234997)
@@ -1,19 +1,8 @@
1
1
  from copy import copy
2
- from typing import List, Optional, Any
2
+ from typing import Any, List, Optional
3
3
 
4
4
  import duckdb
5
5
  import pandas as pd
6
- from vtlengine.DataTypes import (
7
- Integer,
8
- Number,
9
- unary_implicit_promotion,
10
- Boolean,
11
- String,
12
- Duration,
13
- TimeInterval,
14
- TimePeriod,
15
- Date,
16
- )
17
6
 
18
7
  import vtlengine.Operators as Operator
19
8
  from vtlengine.AST.Grammar.tokens import (
@@ -28,11 +17,22 @@ from vtlengine.AST.Grammar.tokens import (
28
17
  VAR_POP,
29
18
  VAR_SAMP,
30
19
  )
20
+ from vtlengine.DataTypes import (
21
+ Boolean,
22
+ Date,
23
+ Duration,
24
+ Integer,
25
+ Number,
26
+ String,
27
+ TimeInterval,
28
+ TimePeriod,
29
+ unary_implicit_promotion,
30
+ )
31
31
  from vtlengine.DataTypes.TimeHandling import (
32
32
  DURATION_MAPPING,
33
33
  DURATION_MAPPING_REVERSED,
34
- TimePeriodHandler,
35
34
  TimeIntervalHandler,
35
+ TimePeriodHandler,
36
36
  )
37
37
  from vtlengine.Exceptions import SemanticError
38
38
  from vtlengine.Model import Component, Dataset, Role
@@ -153,7 +153,7 @@ class Aggregation(Operator.Unary):
153
153
  if comp.role == Role.ATTRIBUTE:
154
154
  del result_components[comp_name]
155
155
  # Change Measure data type
156
- for comp_name, comp in result_components.items():
156
+ for _, comp in result_components.items():
157
157
  if comp.role == Role.MEASURE:
158
158
  unary_implicit_promotion(comp.data_type, cls.type_to_check)
159
159
  if cls.return_type is not None:
@@ -3,8 +3,6 @@ from typing import List, Optional
3
3
 
4
4
  import duckdb
5
5
 
6
- from vtlengine.Exceptions import SemanticError
7
-
8
6
  # if os.environ.get("SPARK"):
9
7
  # import pyspark.pandas as pd
10
8
  # else:
@@ -32,6 +30,7 @@ from vtlengine.AST.Grammar.tokens import (
32
30
  VAR_SAMP,
33
31
  )
34
32
  from vtlengine.DataTypes import COMP_NAME_MAPPING, Integer, Number, unary_implicit_promotion
33
+ from vtlengine.Exceptions import SemanticError
35
34
  from vtlengine.Model import Component, Dataset, Role
36
35
 
37
36
 
@@ -60,10 +59,7 @@ class Analytic(Operator.Unary):
60
59
  params: Optional[List[int]],
61
60
  component_name: Optional[str] = None,
62
61
  ) -> Dataset:
63
- if ordering is None:
64
- order_components = []
65
- else:
66
- order_components = [o.component for o in ordering]
62
+ order_components = [] if ordering is None else [o.component for o in ordering]
67
63
  identifier_names = operand.get_identifiers_names()
68
64
  result_components = operand.components.copy()
69
65
 
@@ -178,10 +174,7 @@ class Analytic(Operator.Unary):
178
174
  window_str = f"{mode} BETWEEN {window.start} {start_mode} AND {window.stop} {stop_mode}"
179
175
 
180
176
  # Partitioning
181
- if len(partitioning) > 0:
182
- partition = "PARTITION BY " + ", ".join(partitioning)
183
- else:
184
- partition = ""
177
+ partition = "PARTITION BY " + ", ".join(partitioning) if len(partitioning) > 0 else ""
185
178
 
186
179
  # Ordering
187
180
  order_str = ""
@@ -1,9 +1,8 @@
1
- from typing import Union, Any
2
-
3
- from vtlengine.Operators import Binary
1
+ from typing import Any, Union
4
2
 
5
3
  from vtlengine.Exceptions import SemanticError
6
4
  from vtlengine.Model import DataComponent, Dataset
5
+ from vtlengine.Operators import Binary
7
6
 
8
7
  ALL_MODEL_TYPES = Union[DataComponent, Dataset]
9
8
 
@@ -2,13 +2,13 @@
2
2
  # import pyspark.pandas as pd
3
3
  # else:
4
4
  # import pandas as pd
5
- import pandas as pd
5
+ from typing import Any, Optional
6
6
 
7
- from typing import Optional, Any
7
+ import pandas as pd
8
8
 
9
- from vtlengine.AST.Grammar.tokens import AND, OR, XOR, NOT
10
- from vtlengine.DataTypes import Boolean
11
9
  import vtlengine.Operators as Operator
10
+ from vtlengine.AST.Grammar.tokens import AND, NOT, OR, XOR
11
+ from vtlengine.DataTypes import Boolean
12
12
 
13
13
 
14
14
  class Unary(Operator.Unary):
@@ -30,9 +30,7 @@ class Binary(Operator.Binary):
30
30
 
31
31
  @classmethod
32
32
  def apply_operation_two_series(cls, left_series: Any, right_series: Any) -> Any:
33
- result = cls.comp_op(
34
- left_series.astype("boolean"), right_series.astype("boolean")
35
- )
33
+ result = cls.comp_op(left_series.astype("boolean"), right_series.astype("boolean"))
36
34
  return result.replace({pd.NA: None}).astype(object)
37
35
 
38
36
  @classmethod
@@ -1,27 +1,27 @@
1
1
  from copy import copy
2
- from typing import Optional, Any, Union, Type
2
+ from typing import Any, Optional, Type, Union
3
3
 
4
- import vtlengine.Operators as Operator
5
4
  import pandas as pd
5
+
6
+ import vtlengine.Operators as Operator
7
+ from vtlengine.AST.Grammar.tokens import CAST
6
8
  from vtlengine.DataTypes import (
7
9
  COMP_NAME_MAPPING,
8
10
  EXPLICIT_WITH_MASK_TYPE_PROMOTION_MAPPING,
9
11
  EXPLICIT_WITHOUT_MASK_TYPE_PROMOTION_MAPPING,
10
12
  IMPLICIT_TYPE_PROMOTION_MAPPING,
11
- String,
12
- Number,
13
- TimeInterval,
13
+ SCALAR_TYPES_CLASS_REVERSE,
14
14
  Date,
15
- TimePeriod,
16
15
  Duration,
17
- SCALAR_TYPES_CLASS_REVERSE,
16
+ Number,
18
17
  ScalarType,
18
+ String,
19
+ TimeInterval,
20
+ TimePeriod,
19
21
  )
20
22
  from vtlengine.DataTypes.TimeHandling import str_period_to_date
21
-
22
- from vtlengine.AST.Grammar.tokens import CAST
23
23
  from vtlengine.Exceptions import SemanticError
24
- from vtlengine.Model import Component, Dataset, Role, Scalar, DataComponent
24
+ from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar
25
25
 
26
26
  duration_mapping = {"A": 6, "S": 5, "Q": 4, "M": 3, "W": 2, "D": 1}
27
27
 
@@ -286,9 +286,8 @@ class Cast(Operator.Unary):
286
286
  mask: Optional[str] = None,
287
287
  ) -> Any:
288
288
 
289
- if mask is not None:
290
- if not isinstance(mask, str):
291
- raise Exception(f"{cls.op} mask must be a string")
289
+ if mask is not None and not isinstance(mask, str):
290
+ raise Exception(f"{cls.op} mask must be a string")
292
291
 
293
292
  if isinstance(operand, Dataset):
294
293
  return cls.dataset_validation(operand, scalarType, mask)
@@ -1,21 +1,20 @@
1
- import pandas as pd
2
-
3
1
  from copy import copy
4
- from typing import List, Union, Type
2
+ from typing import List, Type, Union
3
+
4
+ import pandas as pd
5
5
 
6
+ from vtlengine.AST import RenameNode
7
+ from vtlengine.AST.Grammar.tokens import AGGREGATE, CALC, DROP, KEEP, RENAME, SUBSPACE
6
8
  from vtlengine.DataTypes import (
7
9
  Boolean,
10
+ ScalarType,
8
11
  String,
9
12
  check_unary_implicit_promotion,
10
13
  unary_implicit_promotion,
11
- ScalarType,
12
14
  )
13
- from vtlengine.Operators import Operator
14
-
15
- from vtlengine.AST import RenameNode
16
- from vtlengine.AST.Grammar.tokens import KEEP, DROP, RENAME, SUBSPACE, CALC, AGGREGATE
17
15
  from vtlengine.Exceptions import SemanticError
18
16
  from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar
17
+ from vtlengine.Operators import Operator
19
18
 
20
19
 
21
20
  class Calc(Operator):
@@ -162,9 +161,8 @@ class Keep(Operator):
162
161
  def evaluate(cls, operands: List[str], dataset: Dataset) -> Dataset:
163
162
  if len(operands) == 0:
164
163
  raise ValueError("Keep clause requires at least one operand")
165
- if dataset is None:
166
- if sum(isinstance(operand, Dataset) for operand in operands) != 1:
167
- raise ValueError("Keep clause requires at most one dataset operand")
164
+ if dataset is None and sum(isinstance(operand, Dataset) for operand in operands) != 1:
165
+ raise ValueError("Keep clause requires at most one dataset operand")
168
166
  result_dataset = cls.validate(operands, dataset)
169
167
  if dataset.data is not None:
170
168
  result_dataset.data = dataset.data[dataset.get_identifiers_names() + operands]
@@ -212,11 +210,11 @@ class Rename(Operator):
212
210
  raise SemanticError("1-3-1", alias=duplicates)
213
211
 
214
212
  for operand in operands:
215
- if operand.old_name not in dataset.components.keys():
213
+ if operand.old_name not in dataset.components:
216
214
  raise SemanticError(
217
215
  "1-1-1-10", op=cls.op, comp_name=operand.old_name, dataset_name=dataset.name
218
216
  )
219
- if operand.new_name in dataset.components.keys():
217
+ if operand.new_name in dataset.components:
220
218
  raise SemanticError(
221
219
  "1-1-6-8", op=cls.op, comp_name=operand.new_name, dataset_name=dataset.name
222
220
  )
@@ -3,15 +3,13 @@ import re
3
3
  from copy import copy
4
4
  from typing import Any, Optional, Union
5
5
 
6
- from vtlengine.Exceptions import SemanticError
7
- from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar, ScalarSet
8
-
9
6
  # if os.environ.get("SPARK"):
10
7
  # import pyspark.pandas as pd
11
8
  # else:
12
9
  # import pandas as pd
13
10
  import pandas as pd
14
11
 
12
+ import vtlengine.Operators as Operator
15
13
  from vtlengine.AST.Grammar.tokens import (
16
14
  CHARSET_MATCH,
17
15
  EQ,
@@ -24,8 +22,9 @@ from vtlengine.AST.Grammar.tokens import (
24
22
  NEQ,
25
23
  NOT_IN,
26
24
  )
27
- from vtlengine.DataTypes import Boolean, COMP_NAME_MAPPING, String, Number, Null
28
- import vtlengine.Operators as Operator
25
+ from vtlengine.DataTypes import COMP_NAME_MAPPING, Boolean, Null, Number, String
26
+ from vtlengine.Exceptions import SemanticError
27
+ from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar, ScalarSet
29
28
 
30
29
 
31
30
  class Unary(Operator.Unary):
@@ -75,11 +74,11 @@ class Binary(Operator.Binary):
75
74
  return_type = Boolean
76
75
 
77
76
  @classmethod
78
- def _cast_values(
79
- cls, x: Optional[Union[int, float, str, bool]], y: Optional[Union[int, float, str, bool]]
80
- ) -> Any:
81
- # Cast both values to the same data type
82
- # An integer can be considered a bool, we must check first boolean, then numbers
77
+ def _cast_values(cls,
78
+ x: Optional[Union[int, float, str, bool]],
79
+ y: Optional[Union[int, float, str, bool]]
80
+ ) -> Any:
81
+ # Cast values to compatible types for comparison
83
82
  try:
84
83
  if isinstance(x, str) and isinstance(y, bool):
85
84
  y = String.cast(y)
@@ -97,6 +96,7 @@ class Binary(Operator.Binary):
97
96
 
98
97
  @classmethod
99
98
  def op_func(cls, x: Any, y: Any) -> Any:
99
+ # Return None if any of the values are NaN
100
100
  if pd.isnull(x) or pd.isnull(y):
101
101
  return None
102
102
  x, y = cls._cast_values(x, y)
@@ -104,12 +104,29 @@ class Binary(Operator.Binary):
104
104
 
105
105
  @classmethod
106
106
  def apply_operation_series_scalar(cls, series: Any, scalar: Any, series_left: bool) -> Any:
107
- if scalar is None:
107
+ if pd.isnull(scalar):
108
108
  return pd.Series(None, index=series.index)
109
+
110
+ first_non_null = series.dropna().iloc[0] if not series.dropna().empty else None
111
+ if first_non_null is not None:
112
+ scalar, first_non_null = cls._cast_values(scalar, first_non_null)
113
+
114
+ series_type = pd.api.types.infer_dtype(series, skipna=True)
115
+ first_non_null_type = pd.api.types.infer_dtype([first_non_null])
116
+
117
+ if series_type != first_non_null_type:
118
+ if isinstance(first_non_null, str):
119
+ series = series.astype(str)
120
+ elif isinstance(first_non_null, (int, float)):
121
+ series = series.astype(float)
122
+
123
+ op = cls.py_op if cls.py_op is not None else cls.op_func
109
124
  if series_left:
110
- return series.map(lambda x: cls.op_func(x, scalar), na_action="ignore")
125
+ result = series.map(lambda x: op(x, scalar), na_action="ignore")
111
126
  else:
112
- return series.map(lambda x: cls.op_func(scalar, x), na_action="ignore")
127
+ result = series.map(lambda x: op(scalar, x), na_action="ignore")
128
+
129
+ return result
113
130
 
114
131
  @classmethod
115
132
  def apply_return_type_dataset(
@@ -408,10 +425,7 @@ class ExistIn(Operator.Operator):
408
425
  reference_identifiers_names = left_id_names
409
426
 
410
427
  # Checking if the left dataset is a subset of the right dataset
411
- if is_subset_left:
412
- common_columns = left_id_names
413
- else:
414
- common_columns = right_id_names
428
+ common_columns = left_id_names if is_subset_left else right_id_names
415
429
 
416
430
  # Check if the common identifiers are equal between the two datasets
417
431
  if dataset_1.data is not None and dataset_2.data is not None:
@@ -1,31 +1,30 @@
1
1
  from copy import copy
2
- from typing import Union, Any, List
2
+ from typing import Any, List, Union
3
3
 
4
4
  import numpy as np
5
5
 
6
+ # if os.environ.get("SPARK", False):
7
+ # import pyspark.pandas as pd
8
+ # else:
9
+ # import pandas as pd
10
+ import pandas as pd
11
+
6
12
  from vtlengine.DataTypes import (
7
- Boolean,
8
13
  COMP_NAME_MAPPING,
9
- binary_implicit_promotion,
10
14
  SCALAR_TYPES_CLASS_REVERSE,
15
+ Boolean,
11
16
  Null,
17
+ binary_implicit_promotion,
12
18
  )
13
- from vtlengine.Operators import Operator, Binary
14
-
15
19
  from vtlengine.Exceptions import SemanticError
16
- from vtlengine.Model import Scalar, DataComponent, Dataset, Role
17
-
18
- # if os.environ.get("SPARK", False):
19
- # import pyspark.pandas as pd
20
- # else:
21
- # import pandas as pd
22
- import pandas as pd
20
+ from vtlengine.Model import DataComponent, Dataset, Role, Scalar
21
+ from vtlengine.Operators import Binary, Operator
23
22
 
24
23
 
25
24
  class If(Operator):
26
25
  """
27
26
  If class:
28
- `If-then-else <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=225&zoom=100,72,142>`_ operator # noqa E501
27
+ `If-then-else <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=225&zoom=100,72,142>`_ operator
29
28
  inherits from Operator, a superclass that contains general validate and evaluate class methods.
30
29
  It has the following class methods:
31
30
  Class methods:
@@ -40,7 +39,7 @@ class If(Operator):
40
39
  validate: Class method that has two branches so datacomponent and datasets can be validated. With datacomponent,
41
40
  the code reviews if it is actually a Measure and if it is a binary operation. Dataset branch reviews if the
42
41
  identifiers are the same in 'if', 'then' and 'else'.
43
- """
42
+ """ # noqa E501
44
43
 
45
44
  @classmethod
46
45
  def evaluate(cls, condition: Any, true_branch: Any, false_branch: Any) -> Any:
@@ -108,7 +107,7 @@ class If(Operator):
108
107
  )
109
108
  if isinstance(result, Dataset):
110
109
  drop_columns = [
111
- column for column in result.data.columns if column not in result.components.keys()
110
+ column for column in result.data.columns if column not in result.components
112
111
  ]
113
112
  result.data = result.data.dropna(subset=drop_columns).drop(columns=drop_columns)
114
113
  if isinstance(true_branch, Scalar) and isinstance(false_branch, Scalar):
@@ -213,14 +212,14 @@ class If(Operator):
213
212
  class Nvl(Binary):
214
213
  """
215
214
  Null class:
216
- `Nvl <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=229&zoom=100,72,370>`_operator class. # noqa E501
215
+ `Nvl <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=229&zoom=100,72,370>`_operator class.
217
216
  It has the following class methods:
218
217
 
219
218
  Class methods:
220
219
  Validate: Class method that validates if the operation at scalar,
221
220
  datacomponent or dataset level can be performed.
222
221
  Evaluate: Evaluates the actual operation, returning the result.
223
- """
222
+ """ # noqa E501
224
223
 
225
224
  @classmethod
226
225
  def evaluate(cls, left: Any, right: Any) -> Union[Scalar, DataComponent, Dataset]:
@@ -287,21 +286,15 @@ class Nvl(Binary):
287
286
  }
288
287
  for comp in result_components.values():
289
288
  comp.nullable = False
290
- return Dataset(
291
- name="result",
292
- components=result_components,
293
- data=None
294
- )
289
+ return Dataset(name="result", components=result_components, data=None)
295
290
 
296
291
 
297
292
  class Case(Operator):
298
293
 
299
294
  @classmethod
300
- def evaluate(cls,
301
- conditions: List[Any],
302
- thenOps: List[Any],
303
- elseOp: Any
304
- ) -> Union[Scalar, DataComponent, Dataset]:
295
+ def evaluate(
296
+ cls, conditions: List[Any], thenOps: List[Any], elseOp: Any
297
+ ) -> Union[Scalar, DataComponent, Dataset]:
305
298
 
306
299
  result = cls.validate(conditions, thenOps, elseOp)
307
300
 
@@ -316,19 +309,25 @@ class Case(Operator):
316
309
 
317
310
  for i, condition in enumerate(conditions):
318
311
  value = thenOps[i].value if isinstance(thenOps[i], Scalar) else thenOps[i].data
319
- result.data = np.where(condition.data, value, # type: ignore[call-overload]
320
- result.data)
312
+ result.data = np.where(
313
+ condition.data, value, result.data # type: ignore[call-overload]
314
+ )
321
315
 
322
316
  condition_mask_else = ~np.any([condition.data for condition in conditions], axis=0)
323
317
  else_value = elseOp.value if isinstance(elseOp, Scalar) else elseOp.data
324
- result.data = pd.Series(np.where(condition_mask_else, else_value, result.data),
325
- index=conditions[0].data.index)
318
+ result.data = pd.Series(
319
+ np.where(condition_mask_else, else_value, result.data),
320
+ index=conditions[0].data.index,
321
+ )
326
322
 
327
323
  if isinstance(result, Dataset):
328
324
  identifiers = result.get_identifiers_names()
329
325
  columns = [col for col in result.get_components_names() if col not in identifiers]
330
- result.data = (conditions[0].data[identifiers] if conditions[0].data is not None
331
- else pd.DataFrame(columns=identifiers))
326
+ result.data = (
327
+ conditions[0].data[identifiers]
328
+ if conditions[0].data is not None
329
+ else pd.DataFrame(columns=identifiers)
330
+ )
332
331
 
333
332
  for i in range(len(conditions)):
334
333
  condition = conditions[i]
@@ -336,28 +335,32 @@ class Case(Operator):
336
335
  condition_mask = condition.data[bool_col]
337
336
 
338
337
  result.data.loc[condition_mask, columns] = (
339
- thenOps[i].value if isinstance(thenOps[i], Scalar)
338
+ thenOps[i].value
339
+ if isinstance(thenOps[i], Scalar)
340
340
  else thenOps[i].data.loc[condition_mask, columns]
341
341
  )
342
342
 
343
- condition_mask_else = ~np.logical_or.reduce([
344
- condition.data[next(x.name for x in condition.get_measures() if
345
- x.data_type == Boolean)].astype(bool) for
346
- condition in conditions])
343
+ condition_mask_else = ~np.logical_or.reduce(
344
+ [
345
+ condition.data[
346
+ next(x.name for x in condition.get_measures() if x.data_type == Boolean)
347
+ ].astype(bool)
348
+ for condition in conditions
349
+ ]
350
+ )
347
351
 
348
352
  result.data.loc[condition_mask_else, columns] = (
349
- elseOp.value if isinstance(elseOp, Scalar)
353
+ elseOp.value
354
+ if isinstance(elseOp, Scalar)
350
355
  else elseOp.data.loc[condition_mask_else, columns]
351
356
  )
352
357
 
353
358
  return result
354
359
 
355
360
  @classmethod
356
- def validate(cls,
357
- conditions: List[Any],
358
- thenOps: List[Any],
359
- elseOp: Any
360
- ) -> Union[Scalar, DataComponent, Dataset]:
361
+ def validate(
362
+ cls, conditions: List[Any], thenOps: List[Any], elseOp: Any
363
+ ) -> Union[Scalar, DataComponent, Dataset]:
361
364
 
362
365
  if len(set(map(type, conditions))) > 1:
363
366
  raise SemanticError("2-1-9-1", op=cls.op)
@@ -424,8 +427,4 @@ class Case(Operator):
424
427
  if isinstance(op, Dataset) and op.get_components_names() != comp_names:
425
428
  raise SemanticError("2-1-9-7", op=cls.op)
426
429
 
427
- return Dataset(
428
- name="result",
429
- components=components,
430
- data=None
431
- )
430
+ return Dataset(name="result", components=components, data=None)
@@ -1,11 +1,11 @@
1
- from typing import Dict, List, Any, Union
1
+ import sqlite3
2
+ from typing import Any, Dict, List, Union
2
3
 
3
4
  import pandas as pd
4
- import sqlite3
5
5
 
6
6
  from vtlengine.DataTypes import COMP_NAME_MAPPING
7
7
  from vtlengine.Exceptions import SemanticError
8
- from vtlengine.Model import Dataset, ExternalRoutine, Role, Component, DataComponent
8
+ from vtlengine.Model import Component, DataComponent, Dataset, ExternalRoutine, Role
9
9
  from vtlengine.Operators import Binary, Unary
10
10
 
11
11
 
@@ -143,7 +143,7 @@ class Eval(Unary):
143
143
  df = cls._execute_query(
144
144
  external_routine.query, external_routine.dataset_names, empty_data_dict
145
145
  )
146
- component_names = [name for name in df.columns]
146
+ component_names = df.columns.tolist()
147
147
  for comp_name in component_names:
148
148
  if comp_name not in output.components:
149
149
  raise SemanticError(