vtlengine 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vtlengine might be problematic. Click here for more details.

Files changed (54) hide show
  1. vtlengine/API/_InternalApi.py +153 -100
  2. vtlengine/API/__init__.py +109 -67
  3. vtlengine/AST/ASTConstructor.py +188 -98
  4. vtlengine/AST/ASTConstructorModules/Expr.py +306 -200
  5. vtlengine/AST/ASTConstructorModules/ExprComponents.py +172 -102
  6. vtlengine/AST/ASTConstructorModules/Terminals.py +158 -95
  7. vtlengine/AST/ASTEncoders.py +1 -1
  8. vtlengine/AST/ASTTemplate.py +8 -9
  9. vtlengine/AST/ASTVisitor.py +8 -12
  10. vtlengine/AST/DAG/__init__.py +43 -35
  11. vtlengine/AST/DAG/_words.py +4 -4
  12. vtlengine/AST/Grammar/lexer.py +732 -142
  13. vtlengine/AST/Grammar/parser.py +2188 -826
  14. vtlengine/AST/Grammar/tokens.py +128 -128
  15. vtlengine/AST/VtlVisitor.py +7 -4
  16. vtlengine/AST/__init__.py +22 -11
  17. vtlengine/DataTypes/NumericTypesHandling.py +5 -4
  18. vtlengine/DataTypes/TimeHandling.py +194 -301
  19. vtlengine/DataTypes/__init__.py +304 -218
  20. vtlengine/Exceptions/__init__.py +52 -27
  21. vtlengine/Exceptions/messages.py +134 -62
  22. vtlengine/Interpreter/__init__.py +781 -487
  23. vtlengine/Model/__init__.py +165 -121
  24. vtlengine/Operators/Aggregation.py +156 -95
  25. vtlengine/Operators/Analytic.py +115 -59
  26. vtlengine/Operators/Assignment.py +7 -4
  27. vtlengine/Operators/Boolean.py +27 -32
  28. vtlengine/Operators/CastOperator.py +177 -131
  29. vtlengine/Operators/Clause.py +137 -99
  30. vtlengine/Operators/Comparison.py +148 -117
  31. vtlengine/Operators/Conditional.py +149 -98
  32. vtlengine/Operators/General.py +68 -47
  33. vtlengine/Operators/HROperators.py +91 -72
  34. vtlengine/Operators/Join.py +217 -118
  35. vtlengine/Operators/Numeric.py +89 -44
  36. vtlengine/Operators/RoleSetter.py +16 -15
  37. vtlengine/Operators/Set.py +61 -36
  38. vtlengine/Operators/String.py +213 -139
  39. vtlengine/Operators/Time.py +334 -216
  40. vtlengine/Operators/Validation.py +117 -76
  41. vtlengine/Operators/__init__.py +340 -213
  42. vtlengine/Utils/__init__.py +195 -40
  43. vtlengine/__init__.py +1 -1
  44. vtlengine/files/output/__init__.py +15 -6
  45. vtlengine/files/output/_time_period_representation.py +10 -9
  46. vtlengine/files/parser/__init__.py +77 -52
  47. vtlengine/files/parser/_rfc_dialect.py +6 -5
  48. vtlengine/files/parser/_time_checking.py +46 -37
  49. vtlengine-1.0.1.dist-info/METADATA +236 -0
  50. vtlengine-1.0.1.dist-info/RECORD +58 -0
  51. {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/WHEEL +1 -1
  52. vtlengine-1.0.dist-info/METADATA +0 -104
  53. vtlengine-1.0.dist-info/RECORD +0 -58
  54. {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/LICENSE.md +0 -0
@@ -8,9 +8,23 @@ import pandas as pd
8
8
  from vtlengine.DataTypes import Integer, Number
9
9
  from vtlengine.Operators import ALL_MODEL_DATA_TYPES
10
10
 
11
- from vtlengine.AST.Grammar.tokens import ABS, CEIL, DIV, EXP, FLOOR, LN, LOG, MINUS, MOD, MULT, \
12
- PLUS, POWER, \
13
- ROUND, SQRT, TRUNC
11
+ from vtlengine.AST.Grammar.tokens import (
12
+ ABS,
13
+ CEIL,
14
+ DIV,
15
+ EXP,
16
+ FLOOR,
17
+ LN,
18
+ LOG,
19
+ MINUS,
20
+ MOD,
21
+ MULT,
22
+ PLUS,
23
+ POWER,
24
+ ROUND,
25
+ SQRT,
26
+ TRUNC,
27
+ )
14
28
  from vtlengine.Exceptions import SemanticError
15
29
  from vtlengine.Model import DataComponent, Dataset, Scalar
16
30
 
@@ -19,6 +33,7 @@ class Unary(Operator.Unary):
19
33
  """
20
34
  Checks that the unary operation is performed with a number.
21
35
  """
36
+
22
37
  type_to_check = Number
23
38
 
24
39
 
@@ -26,6 +41,7 @@ class Binary(Operator.Binary):
26
41
  """
27
42
  Checks that the binary operation is performed with numbers.
28
43
  """
44
+
29
45
  type_to_check = Number
30
46
 
31
47
  @classmethod
@@ -52,8 +68,9 @@ class Binary(Operator.Binary):
52
68
 
53
69
  class UnPlus(Unary):
54
70
  """
55
- `Plus <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=94&zoom=100,72,142> `_ unary operator
71
+ `Plus <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=94&zoom=100,72,142> `_ unary operator # noqa E501
56
72
  """
73
+
57
74
  op = PLUS
58
75
  py_op = operator.pos
59
76
 
@@ -64,24 +81,27 @@ class UnPlus(Unary):
64
81
 
65
82
  class UnMinus(Unary):
66
83
  """
67
- `Minus <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=95&zoom=100,72,414> `_unary operator
84
+ `Minus <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=95&zoom=100,72,414> `_unary operator # noqa E501
68
85
  """
86
+
69
87
  op = MINUS
70
88
  py_op = operator.neg
71
89
 
72
90
 
73
91
  class AbsoluteValue(Unary):
74
92
  """
75
- `Absolute <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=112&zoom=100,72,801> `_ unary operator
93
+ `Absolute <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=112&zoom=100,72,801> `_ unary operator # noqa E501
76
94
  """
95
+
77
96
  op = ABS
78
97
  py_op = operator.abs
79
98
 
80
99
 
81
100
  class Exponential(Unary):
82
101
  """
83
- `Exponential <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=114&zoom=100,72,94>`_ unary operator
102
+ `Exponential <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=114&zoom=100,72,94>`_ unary operator # noqa E501
84
103
  """
104
+
85
105
  op = EXP
86
106
  py_op = math.exp
87
107
  return_type = Number
@@ -89,9 +109,10 @@ class Exponential(Unary):
89
109
 
90
110
  class NaturalLogarithm(Unary):
91
111
  """
92
- `Natural logarithm <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=115&zoom=100,72,394> `_
112
+ `Natural logarithm <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=115&zoom=100,72,394> `_ # noqa E501
93
113
  unary operator
94
114
  """
115
+
95
116
  op = LN
96
117
  py_op = math.log
97
118
  return_type = Number
@@ -99,9 +120,10 @@ class NaturalLogarithm(Unary):
99
120
 
100
121
  class SquareRoot(Unary):
101
122
  """
102
- `Square Root <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=119&zoom=100,72,556> '_
123
+ `Square Root <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=119&zoom=100,72,556> '_ # noqa E501
103
124
  unary operator
104
125
  """
126
+
105
127
  op = SQRT
106
128
  py_op = math.sqrt
107
129
  return_type = Number
@@ -109,8 +131,9 @@ class SquareRoot(Unary):
109
131
 
110
132
  class Ceil(Unary):
111
133
  """
112
- `Ceilling <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=110&zoom=100,72,94> `_ unary operator
134
+ `Ceilling <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=110&zoom=100,72,94> `_ unary operator # noqa E501
113
135
  """
136
+
114
137
  op = CEIL
115
138
  py_op = math.ceil
116
139
  return_type = Integer
@@ -118,8 +141,9 @@ class Ceil(Unary):
118
141
 
119
142
  class Floor(Unary):
120
143
  """
121
- `Floor <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=111&zoom=100,72,442> `_ unary operator
144
+ `Floor <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=111&zoom=100,72,442> `_ unary operator # noqa E501
122
145
  """
146
+
123
147
  op = FLOOR
124
148
  py_op = math.floor
125
149
  return_type = Integer
@@ -127,8 +151,9 @@ class Floor(Unary):
127
151
 
128
152
  class BinPlus(Binary):
129
153
  """
130
- `Addition <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=96&zoom=100,72,692> `_ binary operator
154
+ `Addition <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=96&zoom=100,72,692> `_ binary operator # noqa E501
131
155
  """
156
+
132
157
  op = PLUS
133
158
  py_op = operator.add
134
159
  type_to_check = Number
@@ -136,8 +161,9 @@ class BinPlus(Binary):
136
161
 
137
162
  class BinMinus(Binary):
138
163
  """
139
- `Subtraction <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=98&zoom=100,72,448> `_ binary operator
164
+ `Subtraction <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=98&zoom=100,72,448> `_ binary operator # noqa E501
140
165
  """
166
+
141
167
  op = MINUS
142
168
  py_op = operator.sub
143
169
  type_to_check = Number
@@ -145,18 +171,20 @@ class BinMinus(Binary):
145
171
 
146
172
  class Mult(Binary):
147
173
  """
148
- `Multiplication <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=100&zoom=100,72,254>`_
174
+ `Multiplication <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=100&zoom=100,72,254>`_ # noqa E501
149
175
  binary operator
150
176
  """
177
+
151
178
  op = MULT
152
179
  py_op = operator.mul
153
180
 
154
181
 
155
182
  class Div(Binary):
156
183
  """
157
- `Division <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=102&zoom=100,72,94>`_
184
+ `Division <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=102&zoom=100,72,94>`_ # noqa E501
158
185
  binary operator
159
186
  """
187
+
160
188
  op = DIV
161
189
  py_op = operator.truediv
162
190
  return_type = Number
@@ -164,8 +192,9 @@ class Div(Binary):
164
192
 
165
193
  class Logarithm(Binary):
166
194
  """
167
- `Logarithm <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=118&zoom=100,72,228>`_ operator
195
+ `Logarithm <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=118&zoom=100,72,228>`_ operator # noqa E501
168
196
  """
197
+
169
198
  op = LOG
170
199
  return_type = Number
171
200
 
@@ -181,16 +210,18 @@ class Logarithm(Binary):
181
210
 
182
211
  class Modulo(Binary):
183
212
  """
184
- `Module <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=104&zoom=100,72,94>`_ operator
213
+ `Module <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=104&zoom=100,72,94>`_ operator # noqa E501
185
214
  """
215
+
186
216
  op = MOD
187
217
  py_op = operator.mod
188
218
 
189
219
 
190
220
  class Power(Binary):
191
221
  """
192
- `Power <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=116&zoom=100,72,693>`_ operator
222
+ `Power <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=116&zoom=100,72,693>`_ operator # noqa E501
193
223
  """
224
+
194
225
  op = POWER
195
226
  return_type = Number
196
227
 
@@ -198,26 +229,30 @@ class Power(Binary):
198
229
  def py_op(cls, x: Any, param: Any) -> Any:
199
230
  if pd.isnull(param):
200
231
  return None
201
- return x ** param
232
+ return x**param
202
233
 
203
234
 
204
235
  class Parameterized(Unary):
205
236
  """Parametrized class
206
- Inherits from Unary class, to validate the data type and evaluate if it is the correct one to
207
- perform the operation. Similar to Unary, but in the end, the param validation is added.
237
+ Inherits from Unary class, to validate the data type and evaluate if it is the correct one to
238
+ perform the operation. Similar to Unary, but in the end, the param validation is added.
208
239
  """
209
240
 
210
241
  @classmethod
211
- def validate(cls, operand: Operator.ALL_MODEL_DATA_TYPES,
212
- param: Optional[Union[DataComponent, Scalar]] = None):
242
+ def validate(
243
+ cls,
244
+ operand: Operator.ALL_MODEL_DATA_TYPES,
245
+ param: Optional[Union[DataComponent, Scalar]] = None,
246
+ ) -> Any:
213
247
 
214
248
  if param is not None:
215
249
  if isinstance(param, Dataset):
216
250
  raise SemanticError("1-1-15-8", op=cls.op, comp_type="Dataset")
217
251
  if isinstance(param, DataComponent):
218
252
  if isinstance(operand, Scalar):
219
- raise SemanticError("1-1-15-8", op=cls.op,
220
- comp_type="DataComponent and an Scalar operand")
253
+ raise SemanticError(
254
+ "1-1-15-8", op=cls.op, comp_type="DataComponent and an Scalar operand"
255
+ )
221
256
  cls.validate_type_compatibility(param.data_type)
222
257
  else:
223
258
  cls.validate_scalar_type(param)
@@ -233,17 +268,19 @@ class Parameterized(Unary):
233
268
  return None if pd.isnull(x) else cls.py_op(x, param)
234
269
 
235
270
  @classmethod
236
- def apply_operation_two_series(cls, left_series: pd.Series, right_series: pd.Series) -> Any:
271
+ def apply_operation_two_series(cls, left_series: Any, right_series: Any) -> Any:
237
272
  return left_series.combine(right_series, cls.op_func)
238
273
 
239
274
  @classmethod
240
- def apply_operation_series_scalar(cls, series: pd.Series, param: Any) -> Any:
275
+ def apply_operation_series_scalar(cls, series: Any, param: Any) -> Any:
241
276
  return series.map(lambda x: cls.op_func(x, param))
242
277
 
243
278
  @classmethod
244
- def dataset_evaluation(cls, operand: Dataset, param: Union[DataComponent, Scalar]):
279
+ def dataset_evaluation(
280
+ cls, operand: Dataset, param: Optional[Union[DataComponent, Scalar]] = None
281
+ ) -> Dataset:
245
282
  result = cls.validate(operand, param)
246
- result.data = operand.data.copy()
283
+ result.data = operand.data.copy() if operand.data is not None else pd.DataFrame()
247
284
  for measure_name in result.get_measures_names():
248
285
  try:
249
286
  if isinstance(param, DataComponent):
@@ -251,49 +288,56 @@ class Parameterized(Unary):
251
288
  result.data[measure_name], param.data
252
289
  )
253
290
  else:
254
- param_value = None if param is None else param.value
291
+ param_value = param.value if param is not None else None
255
292
  result.data[measure_name] = cls.apply_operation_series_scalar(
256
293
  result.data[measure_name], param_value
257
294
  )
258
295
  except ValueError:
259
- raise SemanticError("2-1-15-1", op=cls.op, comp_name=measure_name,
260
- dataset_name=operand.name) from None
296
+ raise SemanticError(
297
+ "2-1-15-1", op=cls.op, comp_name=measure_name, dataset_name=operand.name
298
+ ) from None
261
299
  result.data = result.data[result.get_components_names()]
262
300
  return result
263
301
 
264
302
  @classmethod
265
- def component_evaluation(cls, operand: DataComponent, param: Union[DataComponent, Scalar]):
303
+ def component_evaluation(
304
+ cls, operand: DataComponent, param: Optional[Union[DataComponent, Scalar]] = None
305
+ ) -> DataComponent:
266
306
  result = cls.validate(operand, param)
307
+ if operand.data is None:
308
+ operand.data = pd.Series()
267
309
  result.data = operand.data.copy()
268
310
  if isinstance(param, DataComponent):
269
311
  result.data = cls.apply_operation_two_series(operand.data, param.data)
270
312
  else:
271
- param_value = None if param is None else param.value
313
+ param_value = param.value if param is not None else None
272
314
  result.data = cls.apply_operation_series_scalar(operand.data, param_value)
273
315
  return result
274
316
 
275
317
  @classmethod
276
- def scalar_evaluation(cls, operand: Scalar, param: Scalar):
318
+ def scalar_evaluation(cls, operand: Scalar, param: Optional[Any] = None) -> Scalar:
277
319
  result = cls.validate(operand, param)
278
- param_value = None if param is None else param.value
320
+ param_value = param.value if param is not None else None
279
321
  result.value = cls.op_func(operand.value, param_value)
280
322
  return result
281
323
 
282
324
  @classmethod
283
- def evaluate(cls, operand: ALL_MODEL_DATA_TYPES,
284
- param: Optional[Union[DataComponent, Scalar]] = None) -> ALL_MODEL_DATA_TYPES:
325
+ def evaluate(
326
+ cls, operand: ALL_MODEL_DATA_TYPES, param: Optional[Union[DataComponent, Scalar]] = None
327
+ ) -> Union[DataComponent, Dataset, Scalar]:
285
328
  if isinstance(operand, Dataset):
286
329
  return cls.dataset_evaluation(operand, param)
287
- if isinstance(operand, DataComponent):
330
+ elif isinstance(operand, DataComponent):
288
331
  return cls.component_evaluation(operand, param)
289
- if isinstance(operand, Scalar):
332
+ else:
290
333
  return cls.scalar_evaluation(operand, param)
291
334
 
292
335
 
293
336
  class Round(Parameterized):
294
337
  """
295
- `Round <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=106&zoom=100,72,94>`_ operator
338
+ `Round <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=106&zoom=100,72,94>`_ operator # noqa E501
296
339
  """
340
+
297
341
  op = ROUND
298
342
  return_type = Integer
299
343
 
@@ -301,7 +345,7 @@ class Round(Parameterized):
301
345
  def py_op(cls, x: Any, param: Any) -> Any:
302
346
  multiplier = 1.0
303
347
  if not pd.isnull(param):
304
- multiplier = 10 ** param
348
+ multiplier = 10**param
305
349
 
306
350
  if x >= 0.0:
307
351
  rounded_value = math.floor(x * multiplier + 0.5) / multiplier
@@ -316,15 +360,16 @@ class Round(Parameterized):
316
360
 
317
361
  class Trunc(Parameterized):
318
362
  """
319
- `Trunc <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=108&zoom=100,72,94>`_ operator.
363
+ `Trunc <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=108&zoom=100,72,94>`_ operator. # noqa E501
320
364
  """
365
+
321
366
  op = TRUNC
322
367
 
323
368
  @classmethod
324
369
  def py_op(cls, x: float, param: Optional[float]) -> Any:
325
370
  multiplier = 1.0
326
371
  if not pd.isnull(param):
327
- multiplier = 10 ** param
372
+ multiplier = 10**param
328
373
 
329
374
  truncated_value = int(x * multiplier) / multiplier
330
375
 
@@ -1,43 +1,42 @@
1
- import os
2
1
  from copy import copy
2
+ from typing import Any, Union
3
3
 
4
4
  from vtlengine.Exceptions import SemanticError
5
5
 
6
- if os.environ.get("SPARK", False):
7
- import pyspark.pandas as pd
8
- else:
9
- import pandas as pd
6
+ # if os.environ.get("SPARK", False):
7
+ # import pyspark.pandas as pd
8
+ # else:
9
+ # import pandas as pd
10
+ import pandas as pd
10
11
 
11
12
  from vtlengine.Model import DataComponent, Role, Scalar
12
13
  from vtlengine.Operators import Unary
13
14
 
14
- ALLOWED_MODEL_TYPES = [DataComponent, Scalar]
15
+ ALLOWED_MODEL_TYPES = Union[DataComponent, Scalar]
15
16
 
16
17
 
17
18
  class RoleSetter(Unary):
18
- role = None
19
+ role: Role
19
20
 
20
21
  @classmethod
21
- def validate(cls, operand: ALLOWED_MODEL_TYPES, data_size: int = 0):
22
+ def validate(cls, operand: ALLOWED_MODEL_TYPES, data_size: int = 0) -> DataComponent:
22
23
  if isinstance(operand, Scalar):
23
-
24
24
  nullable = True
25
25
  if cls.role == Role.IDENTIFIER or operand.value is not None:
26
26
  nullable = False
27
-
28
27
  return DataComponent(
29
28
  name=operand.name,
30
29
  data_type=operand.data_type,
31
30
  role=cls.role,
32
31
  nullable=nullable,
33
- data=None
32
+ data=None,
34
33
  )
35
34
  operand.role = cls.role
36
35
  return copy(operand)
37
36
 
38
37
  @classmethod
39
- def evaluate(cls, operand: ALLOWED_MODEL_TYPES, data_size: int = 0):
40
- if isinstance(operand, DataComponent):
38
+ def evaluate(cls, operand: Any, data_size: int = 0) -> DataComponent:
39
+ if isinstance(operand, DataComponent) and operand.data is not None:
41
40
  if not operand.nullable and any(operand.data.isnull()):
42
41
  raise SemanticError("1-1-1-16")
43
42
  result = cls.validate(operand, data_size)
@@ -52,14 +51,16 @@ class Identifier(RoleSetter):
52
51
  role = Role.IDENTIFIER
53
52
 
54
53
  @classmethod
55
- def validate(cls, operand: ALLOWED_MODEL_TYPES, data_size: int = 0):
54
+ def validate(cls, operand: ALLOWED_MODEL_TYPES, data_size: int = 0) -> DataComponent:
56
55
  result = super().validate(operand)
57
56
  if result.nullable:
58
57
  raise SemanticError("1-1-1-16")
59
58
  return result
60
59
 
61
60
  @classmethod
62
- def evaluate(cls, operand: ALLOWED_MODEL_TYPES, data_size: int = 0):
61
+ def evaluate( # type: ignore[override]
62
+ cls, operand: ALLOWED_MODEL_TYPES, data_size: int = 0
63
+ ) -> DataComponent:
63
64
  if isinstance(operand, Scalar):
64
65
  if operand.value is None:
65
66
  raise SemanticError("1-1-1-16")
@@ -1,12 +1,12 @@
1
- import os
2
- from typing import List
1
+ from typing import List, Any, Dict
3
2
 
4
3
  from vtlengine.Exceptions import SemanticError
5
4
 
6
- if os.environ.get("SPARK"):
7
- import pyspark.pandas as pd
8
- else:
9
- import pandas as pd
5
+ # if os.environ.get("SPARK"):
6
+ # import pyspark.pandas as pd
7
+ # else:
8
+ # import pandas as pd
9
+ import pandas as pd
10
10
 
11
11
  from vtlengine.Model import Dataset
12
12
  from vtlengine.Operators import Operator
@@ -18,18 +18,22 @@ class Set(Operator):
18
18
  @classmethod
19
19
  def check_same_structure(cls, dataset_1: Dataset, dataset_2: Dataset) -> None:
20
20
  if len(dataset_1.components) != len(dataset_2.components):
21
- raise SemanticError("1-1-17-1", op=cls.op, dataset_1=dataset_1.name,
22
- dataset_2=dataset_2.name)
21
+ raise SemanticError(
22
+ "1-1-17-1", op=cls.op, dataset_1=dataset_1.name, dataset_2=dataset_2.name
23
+ )
23
24
 
24
25
  for comp in dataset_1.components.values():
25
26
  if comp.name not in dataset_2.components:
26
27
  raise Exception(f"Component {comp.name} not found in dataset {dataset_2.name}")
27
28
  second_comp = dataset_2.components[comp.name]
28
- binary_implicit_promotion(comp.data_type, second_comp.data_type, cls.type_to_check,
29
- cls.return_type)
29
+ binary_implicit_promotion(
30
+ comp.data_type, second_comp.data_type, cls.type_to_check, cls.return_type
31
+ )
30
32
  if comp.role != second_comp.role:
31
- raise Exception(f"Component {comp.name} has different roles "
32
- f"in datasets {dataset_1.name} and {dataset_2.name}")
33
+ raise Exception(
34
+ f"Component {comp.name} has different roles "
35
+ f"in datasets {dataset_1.name} and {dataset_2.name}"
36
+ )
33
37
 
34
38
  @classmethod
35
39
  def validate(cls, operands: List[Dataset]) -> Dataset:
@@ -38,7 +42,7 @@ class Set(Operator):
38
42
  for operand in operands[1:]:
39
43
  cls.check_same_structure(base_operand, operand)
40
44
 
41
- result_components = {}
45
+ result_components: Dict[str, Any] = {}
42
46
  for operand in operands:
43
47
  if len(result_components) == 0:
44
48
  result_components = operand.components
@@ -46,7 +50,8 @@ class Set(Operator):
46
50
  for comp_name, comp in operand.components.items():
47
51
  current_comp = result_components[comp_name]
48
52
  result_components[comp_name].data_type = binary_implicit_promotion(
49
- current_comp.data_type, comp.data_type)
53
+ current_comp.data_type, comp.data_type
54
+ )
50
55
  result_components[comp_name].nullable = current_comp.nullable or comp.nullable
51
56
 
52
57
  result = Dataset(name="result", components=result_components, data=None)
@@ -58,10 +63,9 @@ class Union(Set):
58
63
  def evaluate(cls, operands: List[Dataset]) -> Dataset:
59
64
  result = cls.validate(operands)
60
65
  all_datapoints = [ds.data for ds in operands]
61
- result.data = pd.concat(all_datapoints, sort=True,
62
- ignore_index=True)
66
+ result.data = pd.concat(all_datapoints, sort=True, ignore_index=True)
63
67
  identifiers_names = result.get_identifiers_names()
64
- result.data = result.data.drop_duplicates(subset=identifiers_names, keep='first')
68
+ result.data = result.data.drop_duplicates(subset=identifiers_names, keep="first")
65
69
  result.data.reset_index(drop=True, inplace=True)
66
70
  return result
67
71
 
@@ -76,16 +80,22 @@ class Intersection(Set):
76
80
  if result.data is None:
77
81
  result.data = data
78
82
  else:
79
- result.data = result.data.merge(data, how='inner',
80
- on=result.get_identifiers_names())
83
+ if data is None:
84
+ result.data = pd.DataFrame(columns=result.get_identifiers_names())
85
+ break
86
+ result.data = result.data.merge(
87
+ data, how="inner", on=result.get_identifiers_names()
88
+ )
81
89
 
82
- not_identifiers = [col for col in result.get_measures_names() +
83
- result.get_attributes_names()]
90
+ not_identifiers = [
91
+ col for col in result.get_measures_names() + result.get_attributes_names()
92
+ ]
84
93
 
85
94
  for col in not_identifiers:
86
95
  result.data[col] = result.data[col + "_x"]
87
96
  result.data = result.data[result.get_identifiers_names() + not_identifiers]
88
- result.data.reset_index(drop=True, inplace=True)
97
+ if result.data is not None:
98
+ result.data.reset_index(drop=True, inplace=True)
89
99
  return result
90
100
 
91
101
 
@@ -96,35 +106,46 @@ class Symdiff(Set):
96
106
  result = cls.validate(operands)
97
107
  all_datapoints = [ds.data for ds in operands]
98
108
  for data in all_datapoints:
109
+ if data is None:
110
+ data = pd.DataFrame(columns=result.get_identifiers_names())
99
111
  if result.data is None:
100
112
  result.data = data
101
113
  else:
102
114
  # Realiza la operación equivalente en pyspark.pandas
103
- result.data = result.data.merge(data, how='outer',
104
- on=result.get_identifiers_names(),
105
- suffixes=('_x', '_y'))
115
+ result.data = result.data.merge(
116
+ data, how="outer", on=result.get_identifiers_names(), suffixes=("_x", "_y")
117
+ )
106
118
 
107
119
  for measure in result.get_measures_names():
108
- result.data['_merge'] = result.data.apply(
109
- lambda row: 'left_only' if pd.isnull(row[measure + '_y']) else (
110
- 'right_only' if pd.isnull(row[measure + '_x']) else 'both'),
111
- axis=1
120
+ result.data["_merge"] = result.data.apply(
121
+ lambda row: (
122
+ "left_only"
123
+ if pd.isnull(row[measure + "_y"])
124
+ else ("right_only" if pd.isnull(row[measure + "_x"]) else "both")
125
+ ),
126
+ axis=1,
112
127
  )
113
128
 
114
129
  not_identifiers = result.get_measures_names() + result.get_attributes_names()
115
130
  for col in not_identifiers:
116
131
  result.data[col] = result.data.apply(
117
- lambda x, c=col: x[c + '_x'] if x['_merge'] == 'left_only' else (
118
- x[c + '_y'] if x['_merge'] == 'right_only' else None), axis=1)
132
+ lambda x, c=col: (
133
+ x[c + "_x"]
134
+ if x["_merge"] == "left_only"
135
+ else (x[c + "_y"] if x["_merge"] == "right_only" else None)
136
+ ),
137
+ axis=1,
138
+ )
119
139
  result.data = result.data[result.get_identifiers_names() + not_identifiers].dropna()
120
- result.data = result.data.reset_index(drop=True)
140
+ if result.data is not None:
141
+ result.data = result.data.reset_index(drop=True)
121
142
  return result
122
143
 
123
144
 
124
145
  class Setdiff(Set):
125
146
 
126
147
  @staticmethod
127
- def has_null(row):
148
+ def has_null(row: Any) -> bool:
128
149
  return row.isnull().any()
129
150
 
130
151
  @classmethod
@@ -135,12 +156,15 @@ class Setdiff(Set):
135
156
  if result.data is None:
136
157
  result.data = data
137
158
  else:
159
+ if data is None:
160
+ data = pd.DataFrame(columns=result.get_identifiers_names())
138
161
  result.data = result.data.merge(data, how="left", on=result.get_identifiers_names())
139
162
  if len(result.data) > 0:
140
163
  result.data = result.data[result.data.apply(cls.has_null, axis=1)]
141
164
 
142
- not_identifiers = [col for col in result.get_measures_names() +
143
- result.get_attributes_names()]
165
+ not_identifiers = [
166
+ col for col in result.get_measures_names() + result.get_attributes_names()
167
+ ]
144
168
  for col in not_identifiers:
145
169
  if col + "_x" in result.data:
146
170
  result.data[col] = result.data[col + "_x"]
@@ -148,5 +172,6 @@ class Setdiff(Set):
148
172
  if col + "_y" in result.data:
149
173
  del result.data[col + "_y"]
150
174
  result.data = result.data[result.get_identifiers_names() + not_identifiers]
151
- result.data.reset_index(drop=True, inplace=True)
175
+ if result.data is not None:
176
+ result.data.reset_index(drop=True, inplace=True)
152
177
  return result