vtlengine 1.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vtlengine might be problematic. Click here for more details.

Files changed (54) hide show
  1. vtlengine/API/_InternalApi.py +153 -100
  2. vtlengine/API/__init__.py +109 -67
  3. vtlengine/AST/ASTConstructor.py +188 -98
  4. vtlengine/AST/ASTConstructorModules/Expr.py +306 -200
  5. vtlengine/AST/ASTConstructorModules/ExprComponents.py +172 -102
  6. vtlengine/AST/ASTConstructorModules/Terminals.py +158 -95
  7. vtlengine/AST/ASTEncoders.py +1 -1
  8. vtlengine/AST/ASTTemplate.py +8 -9
  9. vtlengine/AST/ASTVisitor.py +8 -12
  10. vtlengine/AST/DAG/__init__.py +43 -35
  11. vtlengine/AST/DAG/_words.py +4 -4
  12. vtlengine/AST/Grammar/lexer.py +732 -142
  13. vtlengine/AST/Grammar/parser.py +2188 -826
  14. vtlengine/AST/Grammar/tokens.py +128 -128
  15. vtlengine/AST/VtlVisitor.py +7 -4
  16. vtlengine/AST/__init__.py +22 -11
  17. vtlengine/DataTypes/NumericTypesHandling.py +5 -4
  18. vtlengine/DataTypes/TimeHandling.py +194 -301
  19. vtlengine/DataTypes/__init__.py +304 -218
  20. vtlengine/Exceptions/__init__.py +52 -27
  21. vtlengine/Exceptions/messages.py +134 -62
  22. vtlengine/Interpreter/__init__.py +781 -487
  23. vtlengine/Model/__init__.py +165 -121
  24. vtlengine/Operators/Aggregation.py +156 -95
  25. vtlengine/Operators/Analytic.py +115 -59
  26. vtlengine/Operators/Assignment.py +7 -4
  27. vtlengine/Operators/Boolean.py +27 -32
  28. vtlengine/Operators/CastOperator.py +177 -131
  29. vtlengine/Operators/Clause.py +137 -99
  30. vtlengine/Operators/Comparison.py +148 -117
  31. vtlengine/Operators/Conditional.py +149 -98
  32. vtlengine/Operators/General.py +68 -47
  33. vtlengine/Operators/HROperators.py +91 -72
  34. vtlengine/Operators/Join.py +217 -118
  35. vtlengine/Operators/Numeric.py +89 -44
  36. vtlengine/Operators/RoleSetter.py +16 -15
  37. vtlengine/Operators/Set.py +61 -36
  38. vtlengine/Operators/String.py +213 -139
  39. vtlengine/Operators/Time.py +334 -216
  40. vtlengine/Operators/Validation.py +117 -76
  41. vtlengine/Operators/__init__.py +340 -213
  42. vtlengine/Utils/__init__.py +195 -40
  43. vtlengine/__init__.py +1 -1
  44. vtlengine/files/output/__init__.py +15 -6
  45. vtlengine/files/output/_time_period_representation.py +10 -9
  46. vtlengine/files/parser/__init__.py +77 -52
  47. vtlengine/files/parser/_rfc_dialect.py +6 -5
  48. vtlengine/files/parser/_time_checking.py +46 -37
  49. vtlengine-1.0.1.dist-info/METADATA +236 -0
  50. vtlengine-1.0.1.dist-info/RECORD +58 -0
  51. {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/WHEEL +1 -1
  52. vtlengine-1.0.dist-info/METADATA +0 -104
  53. vtlengine-1.0.dist-info/RECORD +0 -58
  54. {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/LICENSE.md +0 -0
@@ -1,5 +1,4 @@
1
1
  import operator
2
- import os
3
2
  import re
4
3
  from copy import copy
5
4
  from typing import Any, Optional, Union
@@ -7,33 +6,47 @@ from typing import Any, Optional, Union
7
6
  from vtlengine.Exceptions import SemanticError
8
7
  from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar, ScalarSet
9
8
 
10
- if os.environ.get("SPARK"):
11
- import pyspark.pandas as pd
12
- else:
13
- import pandas as pd
14
-
15
- from vtlengine.AST.Grammar.tokens import CHARSET_MATCH, EQ, GT, GTE, IN, ISNULL, LT, LTE, NEQ, \
16
- NOT_IN
9
+ # if os.environ.get("SPARK"):
10
+ # import pyspark.pandas as pd
11
+ # else:
12
+ # import pandas as pd
13
+ import pandas as pd
14
+
15
+ from vtlengine.AST.Grammar.tokens import (
16
+ CHARSET_MATCH,
17
+ EQ,
18
+ GT,
19
+ GTE,
20
+ IN,
21
+ ISNULL,
22
+ LT,
23
+ LTE,
24
+ NEQ,
25
+ NOT_IN,
26
+ )
17
27
  from vtlengine.DataTypes import Boolean, COMP_NAME_MAPPING, String, Number, Null
18
28
  import vtlengine.Operators as Operator
19
29
 
30
+
20
31
  class Unary(Operator.Unary):
21
32
  """
22
33
  Unary comparison operator. It returns a boolean.
23
34
  """
35
+
24
36
  return_type = Boolean
25
37
 
26
38
 
27
39
  class IsNull(Unary):
28
40
  """
29
- Class that allows to perform the isnull comparison operator. It has different class methods to allow performing
30
- the operation with different datatypes.
41
+ Class that allows to perform the isnull comparison operator.
42
+ It has different class methods to allow performing the operation with different datatypes.
31
43
  """
44
+
32
45
  op = ISNULL
33
46
  py_op = pd.isnull
34
47
 
35
48
  @classmethod
36
- def apply_operation_component(cls, series: pd.Series) -> Any:
49
+ def apply_operation_component(cls, series: Any) -> Any:
37
50
  return series.isnull()
38
51
 
39
52
  @classmethod
@@ -41,14 +54,14 @@ class IsNull(Unary):
41
54
  return pd.isnull(x)
42
55
 
43
56
  @classmethod
44
- def dataset_validation(cls, operand: Dataset):
57
+ def dataset_validation(cls, operand: Dataset) -> Dataset:
45
58
  result = super().dataset_validation(operand)
46
59
  for measure in result.get_measures():
47
60
  measure.nullable = False
48
61
  return result
49
62
 
50
63
  @classmethod
51
- def component_validation(cls, operand: DataComponent):
64
+ def component_validation(cls, operand: DataComponent) -> DataComponent:
52
65
  result = super().component_validation(operand)
53
66
  result.nullable = False
54
67
  return result
@@ -58,11 +71,13 @@ class Binary(Operator.Binary):
58
71
  """
59
72
  Binary comparison operator. It returns a boolean.
60
73
  """
74
+
61
75
  return_type = Boolean
62
76
 
63
77
  @classmethod
64
- def _cast_values(cls, x: Union[int, float, str, bool],
65
- y: Union[int, float, str, bool]) -> tuple:
78
+ def _cast_values(
79
+ cls, x: Optional[Union[int, float, str, bool]], y: Optional[Union[int, float, str, bool]]
80
+ ) -> Any:
66
81
  # Cast both values to the same data type
67
82
  # An integer can be considered a bool, we must check first boolean, then numbers
68
83
  try:
@@ -88,19 +103,20 @@ class Binary(Operator.Binary):
88
103
  return cls.py_op(x, y)
89
104
 
90
105
  @classmethod
91
- def apply_operation_series_scalar(cls, series: pd.Series, scalar: Any,
92
- series_left: bool) -> Any:
106
+ def apply_operation_series_scalar(cls, series: Any, scalar: Any, series_left: bool) -> Any:
93
107
  if scalar is None:
94
108
  return pd.Series(None, index=series.index)
95
109
  if series_left:
96
- return series.map(lambda x: cls.op_func(x, scalar), na_action='ignore')
110
+ return series.map(lambda x: cls.op_func(x, scalar), na_action="ignore")
97
111
  else:
98
- return series.map(lambda x: cls.op_func(scalar, x), na_action='ignore')
112
+ return series.map(lambda x: cls.op_func(scalar, x), na_action="ignore")
99
113
 
100
114
  @classmethod
101
115
  def apply_return_type_dataset(
102
- cls, result_dataset: Dataset, left_operand: Dataset,
103
- right_operand: Union[Dataset, Scalar, ScalarSet]
116
+ cls,
117
+ result_dataset: Dataset,
118
+ left_operand: Dataset,
119
+ right_operand: Union[Dataset, Scalar, ScalarSet],
104
120
  ) -> None:
105
121
  super().apply_return_type_dataset(result_dataset, left_operand, right_operand)
106
122
  is_mono_measure = len(result_dataset.get_measures()) == 1
@@ -110,7 +126,7 @@ class Binary(Operator.Binary):
110
126
  name=COMP_NAME_MAPPING[Boolean],
111
127
  data_type=Boolean,
112
128
  role=Role.MEASURE,
113
- nullable=measure.nullable
129
+ nullable=measure.nullable,
114
130
  )
115
131
  result_dataset.delete_component(measure.name)
116
132
  result_dataset.add_component(component)
@@ -152,16 +168,14 @@ class In(Binary):
152
168
  op = IN
153
169
 
154
170
  @classmethod
155
- def apply_operation_two_series(cls,
156
- left_series: Any,
157
- right_series: ScalarSet) -> Any:
171
+ def apply_operation_two_series(cls, left_series: Any, right_series: ScalarSet) -> Any:
158
172
  if right_series.data_type == Null:
159
173
  return pd.Series(None, index=left_series.index)
160
174
 
161
- return left_series.map(lambda x: x in right_series, na_action='ignore')
175
+ return left_series.map(lambda x: x in right_series, na_action="ignore")
162
176
 
163
177
  @classmethod
164
- def py_op(cls, x, y):
178
+ def py_op(cls, x: Any, y: Any) -> Any:
165
179
  if y.data_type == Null:
166
180
  return None
167
181
  return operator.contains(y, x)
@@ -171,14 +185,12 @@ class NotIn(Binary):
171
185
  op = NOT_IN
172
186
 
173
187
  @classmethod
174
- def apply_operation_two_series(cls,
175
- left_series: Any,
176
- right_series: list) -> Any:
188
+ def apply_operation_two_series(cls, left_series: Any, right_series: Any) -> Any:
177
189
  series_result = In.apply_operation_two_series(left_series, right_series)
178
- return series_result.map(lambda x: not x, na_action='ignore')
190
+ return series_result.map(lambda x: not x, na_action="ignore")
179
191
 
180
192
  @classmethod
181
- def py_op(cls, x, y):
193
+ def py_op(cls, x: Any, y: Any) -> Any:
182
194
  return not operator.contains(y, x)
183
195
 
184
196
 
@@ -187,7 +199,7 @@ class Match(Binary):
187
199
  type_to_check = String
188
200
 
189
201
  @classmethod
190
- def op_func(cls, x, y):
202
+ def op_func(cls, x: Optional[str], y: Optional[str]) -> Optional[bool]:
191
203
  if pd.isnull(x) or pd.isnull(y):
192
204
  return None
193
205
  if isinstance(x, pd.Series):
@@ -199,41 +211,44 @@ class Between(Operator.Operator):
199
211
  return_type = Boolean
200
212
  """
201
213
  This comparison operator has the following class methods.
202
-
214
+
203
215
  Class methods:
204
216
  op_function: Sets the data to be manipulated.
205
-
206
- apply_operation_component: Returns a pandas dataframe with the operation, considering each component with the
207
- schema of op_function.
208
-
209
- apply_return_type_dataset: Because the result must be a boolean, this function evaluates if the measure
210
- is actually a boolean one.
211
-
212
-
217
+ apply_operation_component: Returns a pandas dataframe with the operation,
218
+
219
+ considering each component with the schema of op_function.
220
+
221
+ apply_return_type_dataset: Because the result must be a boolean,
222
+ this function evaluates if the measure is actually a boolean one.
213
223
  """
214
224
 
215
225
  @classmethod
216
- def op_func(cls,
217
- x: Optional[Union[int, float, bool, str]],
218
- y: Optional[Union[int, float, bool, str]],
219
- z: Optional[Union[int, float, bool, str]]):
220
- return None if pd.isnull(x) or pd.isnull(y) or pd.isnull(z) else y <= x <= z
226
+ def op_func(
227
+ cls,
228
+ x: Optional[Union[int, float, bool, str]],
229
+ y: Optional[Union[int, float, bool, str]],
230
+ z: Optional[Union[int, float, bool, str]],
231
+ ) -> Optional[bool]:
232
+ return (
233
+ None
234
+ if (pd.isnull(x) or pd.isnull(y) or pd.isnull(z))
235
+ else y <= x <= z # type: ignore[operator]
236
+ )
221
237
 
222
238
  @classmethod
223
- def apply_operation_component(cls, series: pd.Series,
224
- from_data: Optional[Union[pd.Series, int, float, bool, str]],
225
- to_data: Optional[
226
- Union[pd.Series, int, float, bool, str]]) -> Any:
227
- control_any_series_from_to = isinstance(from_data, pd.Series) or isinstance(to_data,
228
- pd.Series)
239
+ def apply_operation_component(cls, series: Any, from_data: Any, to_data: Any) -> Any:
240
+ control_any_series_from_to = isinstance(from_data, pd.Series) or isinstance(
241
+ to_data, pd.Series
242
+ )
229
243
  if control_any_series_from_to:
230
244
  if not isinstance(from_data, pd.Series):
231
245
  from_data = pd.Series(from_data, index=series.index, dtype=object)
232
246
  if not isinstance(to_data, pd.Series):
233
247
  to_data = pd.Series(to_data, index=series.index)
234
- df = pd.DataFrame({'operand': series, 'from_data': from_data, 'to_data': to_data})
235
- return df.apply(lambda x: cls.op_func(x['operand'], x['from_data'], x['to_data']),
236
- axis=1)
248
+ df = pd.DataFrame({"operand": series, "from_data": from_data, "to_data": to_data})
249
+ return df.apply(
250
+ lambda x: cls.op_func(x["operand"], x["from_data"], x["to_data"]), axis=1
251
+ )
237
252
 
238
253
  return series.map(lambda x: cls.op_func(x, from_data, to_data))
239
254
 
@@ -242,14 +257,13 @@ class Between(Operator.Operator):
242
257
  is_mono_measure = len(operand.get_measures()) == 1
243
258
  for measure in result_dataset.get_measures():
244
259
  operand_type = operand.get_component(measure.name).data_type
245
-
246
260
  result_data_type = cls.type_validation(operand_type)
247
261
  if is_mono_measure and operand_type.promotion_changed_type(result_data_type):
248
262
  component = Component(
249
263
  name=COMP_NAME_MAPPING[result_data_type],
250
264
  data_type=result_data_type,
251
265
  role=Role.MEASURE,
252
- nullable=measure.nullable
266
+ nullable=measure.nullable,
253
267
  )
254
268
  result_dataset.delete_component(measure.name)
255
269
  result_dataset.add_component(component)
@@ -261,30 +275,39 @@ class Between(Operator.Operator):
261
275
  measure.data_type = result_data_type
262
276
 
263
277
  @classmethod
264
- def validate(cls, operand: Union[Dataset, DataComponent, Scalar],
265
- from_: Union[DataComponent, Scalar],
266
- to: Union[DataComponent, Scalar]) -> Any:
278
+ def validate(
279
+ cls,
280
+ operand: Union[Dataset, DataComponent, Scalar],
281
+ from_: Union[DataComponent, Scalar],
282
+ to: Union[DataComponent, Scalar],
283
+ ) -> Any:
284
+ result: Union[Dataset, DataComponent, Scalar]
267
285
  if isinstance(operand, Dataset):
268
286
  if len(operand.get_measures()) == 0:
269
287
  raise SemanticError("1-1-1-8", op=cls.op, name=operand.name)
270
- result_components = {comp_name: copy(comp) for comp_name, comp in
271
- operand.components.items()
272
- if comp.role == Role.IDENTIFIER or comp.role == Role.MEASURE}
288
+ result_components = {
289
+ comp_name: copy(comp)
290
+ for comp_name, comp in operand.components.items()
291
+ if comp.role == Role.IDENTIFIER or comp.role == Role.MEASURE
292
+ }
273
293
  result = Dataset(name=operand.name, components=result_components, data=None)
274
294
  elif isinstance(operand, DataComponent):
275
- result = DataComponent(name=operand.name, data=None,
276
- data_type=cls.return_type, role=operand.role)
277
- elif isinstance(operand, Scalar) and isinstance(from_, Scalar) and isinstance(to, Scalar):
295
+ result = DataComponent(
296
+ name=operand.name, data=None, data_type=cls.return_type, role=operand.role
297
+ )
298
+ elif isinstance(from_, Scalar) and isinstance(to, Scalar):
278
299
  result = Scalar(name=operand.name, value=None, data_type=cls.return_type)
279
300
  else: # From or To is a DataComponent, or both
280
- result = DataComponent(name=operand.name, data=None,
281
- data_type=cls.return_type, role=Role.MEASURE)
301
+ result = DataComponent(
302
+ name=operand.name, data=None, data_type=cls.return_type, role=Role.MEASURE
303
+ )
282
304
 
283
305
  if isinstance(operand, Dataset):
284
306
  for measure in operand.get_measures():
285
307
  cls.validate_type_compatibility(measure.data_type, from_.data_type)
286
308
  cls.validate_type_compatibility(measure.data_type, to.data_type)
287
- cls.apply_return_type_dataset(result, operand)
309
+ if isinstance(result, Dataset):
310
+ cls.apply_return_type_dataset(result, operand)
288
311
  else:
289
312
  cls.validate_type_compatibility(operand.data_type, from_.data_type)
290
313
  cls.validate_type_compatibility(operand.data_type, to.data_type)
@@ -292,18 +315,20 @@ class Between(Operator.Operator):
292
315
  return result
293
316
 
294
317
  @classmethod
295
- def evaluate(cls, operand: Union[DataComponent, Scalar],
296
- from_: Union[DataComponent, Scalar],
297
- to: Union[DataComponent, Scalar]) -> Any:
318
+ def evaluate(
319
+ cls,
320
+ operand: Union[DataComponent, Scalar],
321
+ from_: Union[DataComponent, Scalar],
322
+ to: Union[DataComponent, Scalar],
323
+ ) -> Any:
298
324
  result = cls.validate(operand, from_, to)
299
-
300
325
  from_data = from_.data if isinstance(from_, DataComponent) else from_.value
301
326
  to_data = to.data if isinstance(to, DataComponent) else to.value
302
327
 
303
328
  if (
304
- isinstance(from_data, pd.Series) and
305
- isinstance(to_data, pd.Series) and
306
- len(from_data) != len(to_data)
329
+ isinstance(from_data, pd.Series)
330
+ and isinstance(to_data, pd.Series)
331
+ and len(from_data) != len(to_data)
307
332
  ):
308
333
  raise ValueError("From and To must have the same length")
309
334
 
@@ -311,38 +336,31 @@ class Between(Operator.Operator):
311
336
  result.data = operand.data.copy()
312
337
  for measure_name in operand.get_measures_names():
313
338
  result.data[measure_name] = cls.apply_operation_component(
314
- operand.data[measure_name],
315
- from_data, to_data
339
+ operand.data[measure_name], from_data, to_data
316
340
  )
317
341
  if len(result.get_measures()) == 1:
318
342
  result.data[COMP_NAME_MAPPING[cls.return_type]] = result.data[measure_name]
319
343
  result.data = result.data.drop(columns=[measure_name])
320
344
  result.data = result.data[result.get_components_names()]
321
345
  if isinstance(operand, DataComponent):
322
- result.data = cls.apply_operation_component(
323
- operand.data,
324
- from_data, to_data
325
- )
346
+ result.data = cls.apply_operation_component(operand.data, from_data, to_data)
326
347
  if isinstance(operand, Scalar) and isinstance(from_, Scalar) and isinstance(to, Scalar):
327
348
  if operand.value is None or from_data is None or to_data is None:
328
349
  result.value = None
329
350
  else:
330
351
  result.value = from_data <= operand.value <= to_data
331
- elif (
332
- isinstance(operand, Scalar) and
333
- (
334
- isinstance(from_data, pd.Series) or
335
- isinstance(to_data, pd.Series)
336
- )
352
+ elif isinstance(operand, Scalar) and (
353
+ isinstance(from_data, pd.Series) or isinstance(to_data, pd.Series)
337
354
  ): # From or To is a DataComponent, or both
355
+
338
356
  if isinstance(from_data, pd.Series):
339
357
  series = pd.Series(operand.value, index=from_data.index, dtype=object)
340
- else:
358
+ elif isinstance(to_data, pd.Series):
341
359
  series = pd.Series(operand.value, index=to_data.index, dtype=object)
342
360
  result_series = cls.apply_operation_component(series, from_data, to_data)
343
- result = DataComponent(name=operand.name, data=result_series, data_type=cls.return_type,
344
- role=Role.MEASURE)
345
-
361
+ result = DataComponent(
362
+ name=operand.name, data=result_series, data_type=cls.return_type, role=Role.MEASURE
363
+ )
346
364
  return result
347
365
 
348
366
 
@@ -352,12 +370,14 @@ class ExistIn(Operator.Operator):
352
370
  validate: Sets the identifiers and check if the left one exists in the right one.
353
371
  evaluate: Evaluates if the result data type is actually a boolean.
354
372
  """
373
+
355
374
  op = IN
356
375
 
357
376
  # noinspection PyTypeChecker
358
377
  @classmethod
359
- def validate(cls, dataset_1: Dataset, dataset_2: Dataset,
360
- retain_element: Optional[Boolean]) -> Any:
378
+ def validate(
379
+ cls, dataset_1: Dataset, dataset_2: Dataset, retain_element: Optional[Boolean]
380
+ ) -> Any:
361
381
  left_identifiers = dataset_1.get_identifiers_names()
362
382
  right_identifiers = dataset_2.get_identifiers_names()
363
383
 
@@ -368,17 +388,15 @@ class ExistIn(Operator.Operator):
368
388
 
369
389
  result_components = {comp.name: copy(comp) for comp in dataset_1.get_identifiers()}
370
390
  result_dataset = Dataset(name="result", components=result_components, data=None)
371
- result_dataset.add_component(Component(
372
- name='bool_var',
373
- data_type=Boolean,
374
- role=Role.MEASURE,
375
- nullable=False
376
- ))
391
+ result_dataset.add_component(
392
+ Component(name="bool_var", data_type=Boolean, role=Role.MEASURE, nullable=False)
393
+ )
377
394
  return result_dataset
378
395
 
379
396
  @classmethod
380
- def evaluate(cls, dataset_1: Dataset, dataset_2: Dataset,
381
- retain_element: Optional[Boolean]) -> Any:
397
+ def evaluate(
398
+ cls, dataset_1: Dataset, dataset_2: Dataset, retain_element: Optional[Boolean]
399
+ ) -> Any:
382
400
  result_dataset = cls.validate(dataset_1, dataset_2, retain_element)
383
401
 
384
402
  # Checking the subset
@@ -396,24 +414,36 @@ class ExistIn(Operator.Operator):
396
414
  common_columns = right_id_names
397
415
 
398
416
  # Check if the common identifiers are equal between the two datasets
399
- true_results = pd.merge(dataset_1.data, dataset_2.data, how='inner',
400
- left_on=common_columns,
401
- right_on=common_columns, copy=False)
402
- true_results = true_results[reference_identifiers_names]
417
+ if dataset_1.data is not None and dataset_2.data is not None:
418
+ true_results = pd.merge(
419
+ dataset_1.data,
420
+ dataset_2.data,
421
+ how="inner",
422
+ left_on=common_columns,
423
+ right_on=common_columns,
424
+ )
425
+ true_results = true_results[reference_identifiers_names]
426
+ else:
427
+ true_results = pd.DataFrame(columns=reference_identifiers_names)
403
428
 
404
429
  # Check for empty values
405
430
  if true_results.empty:
406
- true_results['bool_var'] = None
431
+ true_results["bool_var"] = None
407
432
  else:
408
- true_results['bool_var'] = True
409
-
410
- final_result = pd.merge(dataset_1.data, true_results, how='left',
411
- left_on=reference_identifiers_names,
412
- right_on=reference_identifiers_names, copy=False)
413
- final_result = final_result[reference_identifiers_names + ['bool_var']]
433
+ true_results["bool_var"] = True
434
+ if dataset_1.data is None:
435
+ dataset_1.data = pd.DataFrame(columns=reference_identifiers_names)
436
+ final_result = pd.merge(
437
+ dataset_1.data,
438
+ true_results,
439
+ how="left",
440
+ left_on=reference_identifiers_names,
441
+ right_on=reference_identifiers_names,
442
+ )
443
+ final_result = final_result[reference_identifiers_names + ["bool_var"]]
414
444
 
415
445
  # No null values are returned, only True or False
416
- final_result['bool_var'] = final_result['bool_var'].fillna(False)
446
+ final_result["bool_var"] = final_result["bool_var"].fillna(False)
417
447
 
418
448
  # Adding to the result dataset
419
449
  result_dataset.data = final_result
@@ -421,11 +451,12 @@ class ExistIn(Operator.Operator):
421
451
  # Retain only the elements that are specified (True or False)
422
452
  if retain_element is not None:
423
453
  result_dataset.data = result_dataset.data[
424
- result_dataset.data['bool_var'] == retain_element]
454
+ result_dataset.data["bool_var"] == retain_element
455
+ ]
425
456
  result_dataset.data = result_dataset.data.reset_index(drop=True)
426
457
 
427
458
  return result_dataset
428
459
 
429
460
  @staticmethod
430
- def _check_all_columns(row):
461
+ def _check_all_columns(row: Any) -> bool:
431
462
  return all(col_value == True for col_value in row)