vtlengine 1.4.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. vtlengine/API/_InternalApi.py +791 -0
  2. vtlengine/API/__init__.py +612 -0
  3. vtlengine/API/data/schema/external_routines_schema.json +34 -0
  4. vtlengine/API/data/schema/json_schema_2.1.json +116 -0
  5. vtlengine/API/data/schema/value_domain_schema.json +97 -0
  6. vtlengine/AST/ASTComment.py +57 -0
  7. vtlengine/AST/ASTConstructor.py +598 -0
  8. vtlengine/AST/ASTConstructorModules/Expr.py +1928 -0
  9. vtlengine/AST/ASTConstructorModules/ExprComponents.py +995 -0
  10. vtlengine/AST/ASTConstructorModules/Terminals.py +790 -0
  11. vtlengine/AST/ASTConstructorModules/__init__.py +50 -0
  12. vtlengine/AST/ASTDataExchange.py +10 -0
  13. vtlengine/AST/ASTEncoders.py +32 -0
  14. vtlengine/AST/ASTString.py +675 -0
  15. vtlengine/AST/ASTTemplate.py +558 -0
  16. vtlengine/AST/ASTVisitor.py +25 -0
  17. vtlengine/AST/DAG/__init__.py +479 -0
  18. vtlengine/AST/DAG/_words.py +10 -0
  19. vtlengine/AST/Grammar/Vtl.g4 +705 -0
  20. vtlengine/AST/Grammar/VtlTokens.g4 +409 -0
  21. vtlengine/AST/Grammar/__init__.py +0 -0
  22. vtlengine/AST/Grammar/lexer.py +2139 -0
  23. vtlengine/AST/Grammar/parser.py +16597 -0
  24. vtlengine/AST/Grammar/tokens.py +169 -0
  25. vtlengine/AST/VtlVisitor.py +824 -0
  26. vtlengine/AST/__init__.py +674 -0
  27. vtlengine/DataTypes/TimeHandling.py +562 -0
  28. vtlengine/DataTypes/__init__.py +863 -0
  29. vtlengine/DataTypes/_time_checking.py +135 -0
  30. vtlengine/Exceptions/__exception_file_generator.py +96 -0
  31. vtlengine/Exceptions/__init__.py +159 -0
  32. vtlengine/Exceptions/messages.py +1004 -0
  33. vtlengine/Interpreter/__init__.py +2048 -0
  34. vtlengine/Model/__init__.py +501 -0
  35. vtlengine/Operators/Aggregation.py +357 -0
  36. vtlengine/Operators/Analytic.py +455 -0
  37. vtlengine/Operators/Assignment.py +23 -0
  38. vtlengine/Operators/Boolean.py +106 -0
  39. vtlengine/Operators/CastOperator.py +451 -0
  40. vtlengine/Operators/Clause.py +366 -0
  41. vtlengine/Operators/Comparison.py +488 -0
  42. vtlengine/Operators/Conditional.py +495 -0
  43. vtlengine/Operators/General.py +191 -0
  44. vtlengine/Operators/HROperators.py +254 -0
  45. vtlengine/Operators/Join.py +447 -0
  46. vtlengine/Operators/Numeric.py +422 -0
  47. vtlengine/Operators/RoleSetter.py +77 -0
  48. vtlengine/Operators/Set.py +176 -0
  49. vtlengine/Operators/String.py +578 -0
  50. vtlengine/Operators/Time.py +1144 -0
  51. vtlengine/Operators/Validation.py +275 -0
  52. vtlengine/Operators/__init__.py +900 -0
  53. vtlengine/Utils/__Virtual_Assets.py +34 -0
  54. vtlengine/Utils/__init__.py +479 -0
  55. vtlengine/__extras_check.py +17 -0
  56. vtlengine/__init__.py +27 -0
  57. vtlengine/files/__init__.py +0 -0
  58. vtlengine/files/output/__init__.py +35 -0
  59. vtlengine/files/output/_time_period_representation.py +55 -0
  60. vtlengine/files/parser/__init__.py +240 -0
  61. vtlengine/files/parser/_rfc_dialect.py +22 -0
  62. vtlengine/py.typed +0 -0
  63. vtlengine-1.4.0rc2.dist-info/METADATA +89 -0
  64. vtlengine-1.4.0rc2.dist-info/RECORD +66 -0
  65. vtlengine-1.4.0rc2.dist-info/WHEEL +4 -0
  66. vtlengine-1.4.0rc2.dist-info/licenses/LICENSE.md +661 -0
@@ -0,0 +1,900 @@
1
+ import os
2
+ from copy import copy
3
+ from typing import Any, Optional, Union
4
+
5
+ # if os.environ.get("SPARK", False):
6
+ # import pyspark.pandas as pd
7
+ # else:
8
+ # import pandas as pd
9
+ import pandas as pd
10
+
11
+ from vtlengine.AST.Grammar.tokens import (
12
+ AND,
13
+ CEIL,
14
+ EQ,
15
+ FLOOR,
16
+ GT,
17
+ GTE,
18
+ LT,
19
+ LTE,
20
+ NEQ,
21
+ OR,
22
+ ROUND,
23
+ XOR,
24
+ )
25
+ from vtlengine.DataTypes import (
26
+ COMP_NAME_MAPPING,
27
+ SCALAR_TYPES_CLASS_REVERSE,
28
+ binary_implicit_promotion,
29
+ check_binary_implicit_promotion,
30
+ check_unary_implicit_promotion,
31
+ unary_implicit_promotion,
32
+ )
33
+ from vtlengine.DataTypes.TimeHandling import (
34
+ PERIOD_IND_MAPPING,
35
+ TimeIntervalHandler,
36
+ TimePeriodHandler,
37
+ )
38
+ from vtlengine.Exceptions import SemanticError
39
+ from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar, ScalarSet
40
+ from vtlengine.Utils.__Virtual_Assets import VirtualCounter
41
+
42
+ ALL_MODEL_DATA_TYPES = Union[Dataset, Scalar, DataComponent]
43
+
44
+ # This allows changing the data type of the Measure in the result Data Set
45
+ # when the operator is applied to mono-measure Data Sets.
46
+ # TODO: Check if there are more operators that allow this
47
+ MONOMEASURE_CHANGED_ALLOWED = [CEIL, FLOOR, ROUND]
48
+ BINARY_COMPARISON_OPERATORS = [EQ, NEQ, GT, GTE, LT, LTE]
49
+ BINARY_BOOLEAN_OPERATORS = [AND, OR, XOR]
50
+
51
+ only_semantic = False
52
+
53
+
54
+ class Operator:
55
+ """Superclass for all operators"""
56
+
57
+ op: Any = None
58
+ py_op: Any = None
59
+ spark_op: Any = None
60
+ type_to_check: Any = None
61
+ return_type: Any = None
62
+
63
+ @classmethod
64
+ def analyze(cls, *args: Any, **kwargs: Any) -> Any:
65
+ if only_semantic:
66
+ return cls.validate(*args, **kwargs)
67
+ return cls.evaluate(*args, **kwargs)
68
+
69
+ @classmethod
70
+ def cast_time_types(cls, data_type: Any, series: Any) -> Any:
71
+ if cls.op not in BINARY_COMPARISON_OPERATORS:
72
+ return series
73
+ if data_type.__name__ == "TimeInterval":
74
+ series = series.map(
75
+ lambda x: TimeIntervalHandler.from_iso_format(x), na_action="ignore"
76
+ )
77
+ elif data_type.__name__ == "TimePeriod":
78
+ series = series.map(lambda x: TimePeriodHandler(x), na_action="ignore")
79
+ elif data_type.__name__ == "Duration":
80
+ series = series.map(lambda x: PERIOD_IND_MAPPING[x], na_action="ignore")
81
+ return series
82
+
83
+ @classmethod
84
+ def cast_time_types_scalar(cls, data_type: Any, value: str) -> Any:
85
+ if cls.op not in BINARY_COMPARISON_OPERATORS:
86
+ return value
87
+ if value is None:
88
+ return None
89
+ if data_type.__name__ == "TimeInterval":
90
+ return TimeIntervalHandler.from_iso_format(value)
91
+ elif data_type.__name__ == "TimePeriod":
92
+ return TimePeriodHandler(value)
93
+ elif data_type.__name__ == "Duration":
94
+ if value not in PERIOD_IND_MAPPING:
95
+ raise Exception(f"Duration {value} is not valid")
96
+ return PERIOD_IND_MAPPING[value]
97
+ return value
98
+
99
+ @classmethod
100
+ def modify_measure_column(cls, result: Dataset) -> None:
101
+ """
102
+ If an Operator change the data type of the Variable it is applied to (e.g., from string to
103
+ number), the result Data Set cannot maintain this Variable as it happens in the previous
104
+ cases, because a Variable cannot have different data types in different Data Sets.
105
+ As a consequence, the converted variable cannot follow the same rules described in the
106
+ sections above and must be replaced, in the result Data Set, by another Variable of the
107
+ proper data type.
108
+ For sake of simplicity, the operators changing the data type are allowed only on
109
+ mono-measure operand Data Sets, so that the conversion happens on just one Measure.
110
+ A default generic Measure is assigned by default to the result Data Set, depending on the
111
+ data type of the result (the default Measure Variables are reported in the table below).
112
+
113
+ Function used by the evaluate function when a dataset is involved
114
+ """
115
+
116
+ if len(result.get_measures()) == 1 and cls.return_type is not None and result is not None:
117
+ measure_name = result.get_measures_names()[0]
118
+ components = list(result.components.keys())
119
+ columns = list(result.data.columns) if result.data is not None else []
120
+ for column in columns:
121
+ if column not in set(components) and result.data is not None:
122
+ result.data[measure_name] = result.data[column]
123
+ del result.data[column]
124
+
125
+ @classmethod
126
+ def validate_dataset_type(cls, *args: Any) -> None:
127
+ raise Exception("Method should be implemented by inheritors")
128
+
129
+ @classmethod
130
+ def validate_component_type(cls, *args: Any) -> None:
131
+ raise Exception("Method should be implemented by inheritors")
132
+
133
+ @classmethod
134
+ def validate_scalar_type(cls, *args: Any) -> None:
135
+ raise Exception("Method should be implemented by inheritors")
136
+
137
+ @classmethod
138
+ def validate(cls, *args: Any, **kwargs: Any) -> Any:
139
+ raise Exception("Method should be implemented by inheritors")
140
+
141
+ @classmethod
142
+ def evaluate(cls, *args: Any, **kwargs: Any) -> Any:
143
+ raise Exception("Method should be implemented by inheritors")
144
+
145
+ @classmethod
146
+ def scalar_validation(cls, *args: Any) -> Any:
147
+ raise Exception("Method should be implemented by inheritors")
148
+
149
+ @classmethod
150
+ def component_validation(cls, *args: Any) -> Any:
151
+ raise Exception("Method should be implemented by inheritors")
152
+
153
+ @classmethod
154
+ def validate_type_compatibility(cls, *args: Any) -> bool:
155
+ if len(args) == 1:
156
+ operand = args[0]
157
+ return check_unary_implicit_promotion(operand, cls.type_to_check, cls.return_type)
158
+ if len(args) == 2:
159
+ left, right = args
160
+ return check_binary_implicit_promotion(left, right, cls.type_to_check, cls.return_type)
161
+ raise Exception("Method should be implemented by inheritors")
162
+
163
+ @classmethod
164
+ def type_validation(cls, *args: Any) -> Any:
165
+ if len(args) == 1:
166
+ operand = args[0]
167
+ return unary_implicit_promotion(operand, cls.type_to_check, cls.return_type)
168
+ if len(args) == 2:
169
+ left, right = args
170
+ return binary_implicit_promotion(left, right, cls.type_to_check, cls.return_type)
171
+ raise Exception("Method should be implemented by inheritors")
172
+
173
+ @classmethod
174
+ def apply_return_type_dataset(cls, *args: Any) -> None:
175
+ raise Exception("Method should be implemented by inheritors")
176
+
177
+ @classmethod
178
+ def apply_return_type(cls, *args: Any) -> None:
179
+ raise Exception("Method should be implemented by inheritors")
180
+
181
+
182
+ def _id_type_promotion_join_keys(
183
+ c_left: Component,
184
+ c_right: Component,
185
+ join_key: str,
186
+ left_data: Optional[pd.DataFrame] = None,
187
+ right_data: Optional[pd.DataFrame] = None,
188
+ ) -> None:
189
+ if left_data is None:
190
+ left_data = pd.DataFrame()
191
+ if right_data is None:
192
+ right_data = pd.DataFrame()
193
+
194
+ left_type_name: str = str(c_left.data_type.__name__)
195
+ right_type_name: str = str(c_right.data_type.__name__)
196
+
197
+ if left_type_name == right_type_name or len(left_data) == 0 or len(right_data) == 0:
198
+ left_data[join_key] = left_data[join_key].astype(object)
199
+ right_data[join_key] = right_data[join_key].astype(object)
200
+ return
201
+ if (left_type_name == "Integer" and right_type_name == "Number") or (
202
+ left_type_name == "Number" and right_type_name == "Integer"
203
+ ):
204
+ left_data[join_key] = left_data[join_key].map(lambda x: int(float(x)))
205
+ right_data[join_key] = right_data[join_key].map(lambda x: int(float(x)))
206
+ elif left_type_name == "String" and right_type_name in ("Integer", "Number"):
207
+ left_data[join_key] = left_data[join_key].map(lambda x: _handle_str_number(x))
208
+ elif left_type_name in ("Integer", "Number") and right_type_name == "String":
209
+ right_data[join_key] = right_data[join_key].map(lambda x: _handle_str_number(x))
210
+ left_data[join_key] = left_data[join_key].astype(object)
211
+ right_data[join_key] = right_data[join_key].astype(object)
212
+
213
+
214
+ def _handle_str_number(x: Union[str, int, float]) -> Union[str, int, float]:
215
+ if isinstance(x, int):
216
+ return x
217
+ try:
218
+ x = float(x)
219
+ if x.is_integer():
220
+ return int(x)
221
+ return x
222
+ except ValueError: # Unable to get to string, return the same value that will not be matched
223
+ return x
224
+
225
+
226
+ class Binary(Operator):
227
+ @classmethod
228
+ def op_func(cls, *args: Any) -> Any:
229
+ x, y = args
230
+
231
+ if pd.isnull(x) or pd.isnull(y):
232
+ return None
233
+ return cls.py_op(x, y)
234
+
235
+ @classmethod
236
+ def apply_operation_two_series(cls, left_series: Any, right_series: Any) -> Any:
237
+ if os.getenv("SPARK", False):
238
+ if cls.spark_op is None:
239
+ cls.spark_op = cls.py_op
240
+
241
+ nulls = left_series.isnull() | right_series.isnull()
242
+ result = cls.spark_op(left_series, right_series)
243
+ result.loc[nulls] = None
244
+ return result
245
+ result = list(map(cls.op_func, left_series.values, right_series.values))
246
+ return pd.Series(result, index=list(range(len(result))), dtype=object)
247
+
248
+ @classmethod
249
+ def apply_operation_series_scalar(
250
+ cls,
251
+ series: Any,
252
+ scalar: Scalar,
253
+ series_left: bool,
254
+ ) -> Any:
255
+ if scalar is None:
256
+ return pd.Series(None, index=series.index)
257
+ if series_left:
258
+ return series.map(lambda x: cls.py_op(x, scalar), na_action="ignore")
259
+ else:
260
+ return series.map(lambda x: cls.py_op(scalar, x), na_action="ignore")
261
+
262
+ @classmethod
263
+ def validate(cls, *args: Any) -> Any:
264
+ """
265
+ The main function for validate, applies the implicit promotion (or check it), and
266
+ can do a semantic check too.
267
+ Returns an operand.
268
+ """
269
+ left_operand, right_operand = args
270
+
271
+ if isinstance(left_operand, Dataset) and isinstance(right_operand, Dataset):
272
+ return cls.dataset_validation(left_operand, right_operand)
273
+ if isinstance(left_operand, Dataset) and isinstance(right_operand, Scalar):
274
+ return cls.dataset_scalar_validation(left_operand, right_operand)
275
+ if isinstance(left_operand, Scalar) and isinstance(right_operand, Dataset):
276
+ return cls.dataset_scalar_validation(right_operand, left_operand)
277
+ if isinstance(left_operand, Scalar) and isinstance(right_operand, Scalar):
278
+ return cls.scalar_validation(left_operand, right_operand)
279
+ if isinstance(left_operand, DataComponent) and isinstance(right_operand, DataComponent):
280
+ return cls.component_validation(left_operand, right_operand)
281
+ if isinstance(left_operand, DataComponent) and isinstance(right_operand, Scalar):
282
+ return cls.component_scalar_validation(left_operand, right_operand)
283
+ if isinstance(left_operand, Scalar) and isinstance(right_operand, DataComponent):
284
+ return cls.component_scalar_validation(right_operand, left_operand)
285
+ # In operator
286
+ if isinstance(left_operand, Dataset) and isinstance(right_operand, ScalarSet):
287
+ return cls.dataset_set_validation(left_operand, right_operand)
288
+ if isinstance(left_operand, Scalar) and isinstance(right_operand, ScalarSet):
289
+ return cls.scalar_set_validation(left_operand, right_operand)
290
+ if isinstance(left_operand, DataComponent) and isinstance(right_operand, ScalarSet):
291
+ return cls.component_set_validation(left_operand, right_operand)
292
+
293
+ @classmethod
294
+ def dataset_validation(cls, left_operand: Dataset, right_operand: Dataset) -> Dataset:
295
+ dataset_name = VirtualCounter._new_ds_name()
296
+ left_identifiers = left_operand.get_identifiers_names()
297
+ right_identifiers = right_operand.get_identifiers_names()
298
+
299
+ use_right_components = len(left_identifiers) < len(right_identifiers)
300
+
301
+ left_measures = sorted(left_operand.get_measures(), key=lambda x: x.name)
302
+ right_measures = sorted(right_operand.get_measures(), key=lambda x: x.name)
303
+ left_measures_names = [measure.name for measure in left_measures]
304
+ right_measures_names = [measure.name for measure in right_measures]
305
+
306
+ if left_measures_names != right_measures_names:
307
+ raise SemanticError(
308
+ "1-1-14-1",
309
+ op=cls.op,
310
+ left=left_measures_names,
311
+ right=right_measures_names,
312
+ )
313
+ elif len(left_measures) == 0:
314
+ raise SemanticError("1-1-1-8", op=cls.op, name=left_operand.name)
315
+ for left_measure, right_measure in zip(left_measures, right_measures):
316
+ cls.type_validation(left_measure.data_type, right_measure.data_type)
317
+
318
+ # We do not need anymore these variables
319
+ del left_measures
320
+ del right_measures
321
+ del left_measures_names
322
+ del right_measures_names
323
+
324
+ join_keys = list(set(left_identifiers).intersection(right_identifiers))
325
+ if len(join_keys) == 0:
326
+ raise SemanticError("1-2-10", op=cls.op)
327
+
328
+ # Deleting extra identifiers that we do not need anymore
329
+
330
+ base_operand = right_operand if use_right_components else left_operand
331
+ result_components = {
332
+ component_name: copy(component)
333
+ for component_name, component in base_operand.components.items()
334
+ if component.role in [Role.IDENTIFIER, Role.MEASURE]
335
+ }
336
+
337
+ for comp in [x for x in result_components.values() if x.role == Role.MEASURE]:
338
+ if comp.name in left_operand.components and comp.name in right_operand.components:
339
+ left_comp = left_operand.components[comp.name]
340
+ right_comp = right_operand.components[comp.name]
341
+ comp.nullable = left_comp.nullable or right_comp.nullable
342
+
343
+ result_dataset = Dataset(name=dataset_name, components=result_components, data=None)
344
+ cls.apply_return_type_dataset(result_dataset, left_operand, right_operand)
345
+ return result_dataset
346
+
347
+ @classmethod
348
+ def dataset_scalar_validation(cls, dataset: Dataset, scalar: Scalar) -> Dataset:
349
+ dataset_name = VirtualCounter._new_ds_name()
350
+ if len(dataset.get_measures()) == 0:
351
+ raise SemanticError("1-1-1-8", op=cls.op, name=dataset.name)
352
+
353
+ result_components = {
354
+ comp_name: copy(comp)
355
+ for comp_name, comp in dataset.components.items()
356
+ if comp.role in [Role.IDENTIFIER, Role.MEASURE]
357
+ }
358
+ result_dataset = Dataset(name=dataset_name, components=result_components, data=None)
359
+ cls.apply_return_type_dataset(result_dataset, dataset, scalar)
360
+ return result_dataset
361
+
362
+ @classmethod
363
+ def scalar_validation(cls, left_operand: Scalar, right_operand: Scalar) -> Scalar:
364
+ if not cls.validate_type_compatibility(left_operand.data_type, right_operand.data_type):
365
+ raise SemanticError(
366
+ "1-1-1-2",
367
+ type_1=left_operand.data_type,
368
+ type_2=right_operand.data_type,
369
+ type_check=cls.type_to_check,
370
+ )
371
+ return Scalar(
372
+ name="result",
373
+ data_type=cls.type_validation(left_operand.data_type, right_operand.data_type),
374
+ value=None,
375
+ )
376
+
377
+ @classmethod
378
+ def component_validation(
379
+ cls, left_operand: DataComponent, right_operand: DataComponent
380
+ ) -> DataComponent:
381
+ """
382
+ Validates the compatibility between the types of the components and the operator
383
+ :param left_operand: The left component
384
+ :param right_operand: The right component
385
+ :return: The result data type of the validation
386
+ """
387
+ comp_name = VirtualCounter._new_dc_name()
388
+ result_data_type = cls.type_validation(left_operand.data_type, right_operand.data_type)
389
+ result = DataComponent(
390
+ name=comp_name,
391
+ data_type=result_data_type,
392
+ data=None,
393
+ role=left_operand.role,
394
+ nullable=(left_operand.nullable or right_operand.nullable),
395
+ )
396
+
397
+ return result
398
+
399
+ @classmethod
400
+ def component_scalar_validation(cls, component: DataComponent, scalar: Scalar) -> DataComponent:
401
+ cls.type_validation(component.data_type, scalar.data_type)
402
+ result = DataComponent(
403
+ name=component.name,
404
+ data_type=cls.type_validation(component.data_type, scalar.data_type),
405
+ data=None,
406
+ role=component.role,
407
+ nullable=component.nullable or scalar is None,
408
+ )
409
+ return result
410
+
411
+ @classmethod
412
+ def dataset_set_validation(cls, dataset: Dataset, scalar_set: ScalarSet) -> Dataset:
413
+ dataset_name = VirtualCounter._new_ds_name()
414
+ if len(dataset.get_measures()) == 0:
415
+ raise SemanticError("1-1-1-8", op=cls.op, name=dataset.name)
416
+ for measure in dataset.get_measures():
417
+ cls.type_validation(measure.data_type, scalar_set.data_type)
418
+ result_components = {
419
+ comp_name: copy(comp)
420
+ for comp_name, comp in dataset.components.items()
421
+ if comp.role in [Role.IDENTIFIER, Role.MEASURE]
422
+ }
423
+
424
+ result_dataset = Dataset(name=dataset_name, components=result_components, data=None)
425
+ cls.apply_return_type_dataset(result_dataset, dataset, scalar_set)
426
+ return result_dataset
427
+
428
+ @classmethod
429
+ def component_set_validation(
430
+ cls, component: DataComponent, scalar_set: ScalarSet
431
+ ) -> DataComponent:
432
+ comp_name = VirtualCounter._new_dc_name()
433
+ cls.type_validation(component.data_type, scalar_set.data_type)
434
+ result = DataComponent(
435
+ name=comp_name,
436
+ data_type=cls.type_validation(component.data_type, scalar_set.data_type),
437
+ data=None,
438
+ role=Role.MEASURE,
439
+ nullable=component.nullable,
440
+ )
441
+ return result
442
+
443
+ @classmethod
444
+ def scalar_set_validation(cls, scalar: Scalar, scalar_set: ScalarSet) -> Scalar:
445
+ cls.type_validation(scalar.data_type, scalar_set.data_type)
446
+ return Scalar(
447
+ name="result",
448
+ data_type=cls.type_validation(scalar.data_type, scalar_set.data_type),
449
+ value=None,
450
+ )
451
+
452
+ # The following class method implements the type promotion
453
+ @classmethod
454
+ def type_validation(cls, left_type: Any, right_type: Any) -> Any:
455
+ """
456
+ Validates the compatibility between the types of the operands and the operator
457
+ and give us the result ScalarType of the promotion
458
+ (implicit type promotion : binary_implicit_type_promotion)
459
+
460
+ :param left_type: The left operand data type
461
+ :param right_type: The right operand data type
462
+
463
+ :return: result ScalarType or exception
464
+ """
465
+
466
+ return binary_implicit_promotion(left_type, right_type, cls.type_to_check, cls.return_type)
467
+
468
+ # The following class method checks the type promotion
469
+ @classmethod
470
+ def validate_type_compatibility(cls, left: Any, right: Any) -> bool:
471
+ """
472
+ Validates the compatibility between the types of the operands and the operator
473
+ (implicit type promotion : check_binary_implicit_type_promotion)
474
+
475
+ :param left: The left operand
476
+ :param right: The right operand
477
+
478
+ :return: True if the types are compatible, False otherwise
479
+ """
480
+
481
+ return check_binary_implicit_promotion(left, right, cls.type_to_check, cls.return_type)
482
+
483
+ @classmethod
484
+ def apply_return_type_dataset(
485
+ cls, result_dataset: Dataset, left_operand: Any, right_operand: Any
486
+ ) -> None:
487
+ """
488
+ Used in dataset's validation.
489
+ Changes the result dataset and give us his final form
490
+ (#TODO: write this explanation in a better way)
491
+ """
492
+
493
+ changed_allowed = cls.op in MONOMEASURE_CHANGED_ALLOWED
494
+ is_mono_measure = len(result_dataset.get_measures()) == 1
495
+ for measure in result_dataset.get_measures():
496
+ left_type = left_operand.get_component(measure.name).data_type
497
+ if isinstance(right_operand, (ScalarSet, Scalar)):
498
+ right_type = right_operand.data_type
499
+ else:
500
+ right_type = right_operand.get_component(measure.name).data_type
501
+
502
+ result_data_type = cls.type_validation(left_type, right_type)
503
+ if is_mono_measure and left_type.promotion_changed_type(result_data_type):
504
+ component = Component(
505
+ name=COMP_NAME_MAPPING[result_data_type],
506
+ data_type=result_data_type,
507
+ role=Role.MEASURE,
508
+ nullable=measure.nullable,
509
+ )
510
+ result_dataset.delete_component(measure.name)
511
+ result_dataset.add_component(component)
512
+ if result_dataset.data is not None:
513
+ result_dataset.data.rename(columns={measure.name: component.name}, inplace=True)
514
+ elif (
515
+ changed_allowed is False
516
+ and is_mono_measure is False
517
+ and left_type.promotion_changed_type(result_data_type)
518
+ ):
519
+ raise SemanticError("1-1-1-4", op=cls.op)
520
+ else:
521
+ measure.data_type = result_data_type
522
+
523
+ @classmethod
524
+ def dataset_evaluation(cls, left_operand: Dataset, right_operand: Dataset) -> Dataset:
525
+ result_dataset = cls.dataset_validation(left_operand, right_operand)
526
+
527
+ use_right_as_base = False
528
+ if len(left_operand.get_identifiers_names()) < len(right_operand.get_identifiers_names()):
529
+ use_right_as_base = True
530
+ base_operand_data = right_operand.data
531
+ other_operand_data = left_operand.data
532
+ else:
533
+ base_operand_data = left_operand.data
534
+ other_operand_data = right_operand.data
535
+
536
+ join_keys = list(
537
+ set(left_operand.get_identifiers_names()).intersection(
538
+ right_operand.get_identifiers_names()
539
+ )
540
+ )
541
+
542
+ for join_key in join_keys:
543
+ _id_type_promotion_join_keys(
544
+ left_operand.get_component(join_key),
545
+ right_operand.get_component(join_key),
546
+ join_key,
547
+ base_operand_data,
548
+ other_operand_data,
549
+ )
550
+
551
+ try:
552
+ # Merge the data
553
+ if base_operand_data is None or other_operand_data is None:
554
+ result_data: pd.DataFrame = pd.DataFrame()
555
+ else:
556
+ result_data = pd.merge(
557
+ base_operand_data,
558
+ other_operand_data,
559
+ how="inner",
560
+ on=join_keys,
561
+ suffixes=("_x", "_y"),
562
+ )
563
+ except ValueError as e:
564
+ raise Exception(f"Error merging datasets on Binary Operator: {str(e)}")
565
+
566
+ # Measures are the same, using left operand measures names
567
+ for measure in left_operand.get_measures():
568
+ result_data[measure.name + "_x"] = cls.cast_time_types(
569
+ measure.data_type, result_data[measure.name + "_x"]
570
+ )
571
+ result_data[measure.name + "_y"] = cls.cast_time_types(
572
+ measure.data_type, result_data[measure.name + "_y"]
573
+ )
574
+ if use_right_as_base:
575
+ result_data[measure.name] = cls.apply_operation_two_series(
576
+ result_data[measure.name + "_y"], result_data[measure.name + "_x"]
577
+ )
578
+ else:
579
+ result_data[measure.name] = cls.apply_operation_two_series(
580
+ result_data[measure.name + "_x"], result_data[measure.name + "_y"]
581
+ )
582
+ result_data = result_data.drop([measure.name + "_x", measure.name + "_y"], axis=1)
583
+
584
+ # Delete attributes from the result data
585
+ attributes = list(
586
+ set(left_operand.get_attributes_names()).union(right_operand.get_attributes_names())
587
+ )
588
+ for att in attributes:
589
+ if att in result_data.columns:
590
+ result_data = result_data.drop(att, axis=1)
591
+ if att + "_x" in result_data.columns:
592
+ result_data = result_data.drop(att + "_x", axis=1)
593
+ if att + "_y" in result_data.columns:
594
+ result_data = result_data.drop(att + "_y", axis=1)
595
+
596
+ result_dataset.data = result_data
597
+ cls.modify_measure_column(result_dataset)
598
+
599
+ return result_dataset
600
+
601
+ @classmethod
602
+ def scalar_evaluation(cls, left_operand: Scalar, right_operand: Scalar) -> Scalar:
603
+ result_scalar = cls.scalar_validation(left_operand, right_operand)
604
+ result_scalar.value = cls.op_func(left_operand.value, right_operand.value)
605
+ return result_scalar
606
+
607
+ @classmethod
608
+ def dataset_scalar_evaluation(
609
+ cls, dataset: Dataset, scalar: Scalar, dataset_left: bool = True
610
+ ) -> Dataset:
611
+ result_dataset = cls.dataset_scalar_validation(dataset, scalar)
612
+ result_data = dataset.data.copy() if dataset.data is not None else pd.DataFrame()
613
+ result_dataset.data = result_data
614
+
615
+ scalar_value = cls.cast_time_types_scalar(scalar.data_type, scalar.value)
616
+
617
+ for measure in dataset.get_measures():
618
+ measure_data = cls.cast_time_types(measure.data_type, result_data[measure.name].copy())
619
+ if (
620
+ measure.data_type.__name__.__str__() == "Duration"
621
+ and not isinstance(scalar_value, int)
622
+ and scalar_value is not None
623
+ ):
624
+ scalar_value = PERIOD_IND_MAPPING[scalar_value]
625
+ result_dataset.data[measure.name] = cls.apply_operation_series_scalar(
626
+ measure_data, scalar_value, dataset_left
627
+ )
628
+
629
+ result_dataset.data = result_data
630
+ cols_to_keep = dataset.get_identifiers_names() + dataset.get_measures_names()
631
+ result_dataset.data = result_dataset.data[cols_to_keep]
632
+ cls.modify_measure_column(result_dataset)
633
+ return result_dataset
634
+
635
+ @classmethod
636
+ def component_evaluation(
637
+ cls, left_operand: DataComponent, right_operand: DataComponent
638
+ ) -> DataComponent:
639
+ result_component = cls.component_validation(left_operand, right_operand)
640
+ left_data = cls.cast_time_types(
641
+ left_operand.data_type,
642
+ left_operand.data.copy() if left_operand.data is not None else pd.Series(),
643
+ )
644
+ right_data = cls.cast_time_types(
645
+ right_operand.data_type,
646
+ (right_operand.data.copy() if right_operand.data is not None else pd.Series()),
647
+ )
648
+ result_component.data = cls.apply_operation_two_series(left_data, right_data)
649
+ return result_component
650
+
651
+ @classmethod
652
+ def component_scalar_evaluation(
653
+ cls, component: DataComponent, scalar: Scalar, component_left: bool = True
654
+ ) -> DataComponent:
655
+ result_component = cls.component_scalar_validation(component, scalar)
656
+ comp_data = cls.cast_time_types(
657
+ component.data_type,
658
+ component.data.copy() if component.data is not None else pd.Series(),
659
+ )
660
+ scalar_value = cls.cast_time_types_scalar(scalar.data_type, scalar.value)
661
+ if (
662
+ component.data_type.__name__.__str__() == "Duration"
663
+ and not isinstance(scalar_value, int)
664
+ and scalar_value is not None
665
+ ):
666
+ scalar_value = PERIOD_IND_MAPPING[scalar_value]
667
+ result_component.data = cls.apply_operation_series_scalar(
668
+ comp_data, scalar_value, component_left
669
+ )
670
+ return result_component
671
+
672
+ @classmethod
673
+ def dataset_set_evaluation(cls, dataset: Dataset, scalar_set: ScalarSet) -> Dataset:
674
+ result_dataset = cls.dataset_set_validation(dataset, scalar_set)
675
+ result_data = dataset.data.copy() if dataset.data is not None else pd.DataFrame()
676
+
677
+ for measure_name in dataset.get_measures_names():
678
+ if dataset.data is not None:
679
+ result_data[measure_name] = cls.apply_operation_two_series(
680
+ dataset.data[measure_name], scalar_set
681
+ )
682
+
683
+ cols_to_keep = dataset.get_identifiers_names() + dataset.get_measures_names()
684
+ result_dataset.data = result_data[cols_to_keep]
685
+ cls.modify_measure_column(result_dataset)
686
+
687
+ return result_dataset
688
+
689
+ @classmethod
690
+ def component_set_evaluation(
691
+ cls, component: DataComponent, scalar_set: ScalarSet
692
+ ) -> DataComponent:
693
+ result_component = cls.component_set_validation(component, scalar_set)
694
+ result_component.data = cls.apply_operation_two_series(
695
+ component.data.copy() if component.data is not None else pd.Series(),
696
+ scalar_set,
697
+ )
698
+ return result_component
699
+
700
+ @classmethod
701
+ def scalar_set_evaluation(cls, scalar: Scalar, scalar_set: ScalarSet) -> Scalar:
702
+ result_scalar = cls.scalar_set_validation(scalar, scalar_set)
703
+ result_scalar.value = cls.op_func(scalar.value, scalar_set)
704
+ return result_scalar
705
+
706
+ @classmethod
707
+ def evaluate(cls, left_operand: Any, right_operand: Any) -> Any:
708
+ """
709
+ Evaluate the operation (based on validation output)
710
+ :param left_operand: The left operand
711
+ :param right_operand: The right operand
712
+ :return: The result of the operation
713
+ """
714
+
715
+ if isinstance(left_operand, Dataset) and isinstance(right_operand, Dataset):
716
+ return cls.dataset_evaluation(left_operand, right_operand)
717
+ if isinstance(left_operand, Scalar) and isinstance(right_operand, Scalar):
718
+ return cls.scalar_evaluation(left_operand, right_operand)
719
+ if isinstance(left_operand, Dataset) and isinstance(right_operand, Scalar):
720
+ return cls.dataset_scalar_evaluation(left_operand, right_operand, dataset_left=True)
721
+ if isinstance(left_operand, Scalar) and isinstance(right_operand, Dataset):
722
+ return cls.dataset_scalar_evaluation(right_operand, left_operand, dataset_left=False)
723
+ if isinstance(left_operand, DataComponent) and isinstance(right_operand, DataComponent):
724
+ return cls.component_evaluation(left_operand, right_operand)
725
+ if isinstance(left_operand, DataComponent) and isinstance(right_operand, Scalar):
726
+ return cls.component_scalar_evaluation(left_operand, right_operand, component_left=True)
727
+ if isinstance(left_operand, Scalar) and isinstance(right_operand, DataComponent):
728
+ return cls.component_scalar_evaluation(
729
+ right_operand, left_operand, component_left=False
730
+ )
731
+ if isinstance(left_operand, Dataset) and isinstance(right_operand, ScalarSet):
732
+ return cls.dataset_set_evaluation(left_operand, right_operand)
733
+ if isinstance(left_operand, DataComponent) and isinstance(right_operand, ScalarSet):
734
+ return cls.component_set_evaluation(left_operand, right_operand)
735
+ if isinstance(left_operand, Scalar) and isinstance(right_operand, ScalarSet):
736
+ return cls.scalar_set_evaluation(left_operand, right_operand)
737
+
738
+
739
+ class Unary(Operator):
740
+ @classmethod
741
+ def op_func(cls, *args: Any) -> Any:
742
+ x = args[0]
743
+
744
+ return None if pd.isnull(x) else cls.py_op(x)
745
+
746
+ @classmethod
747
+ def apply_operation_component(cls, series: Any) -> Any:
748
+ """
749
+ Applies the operation to a component
750
+ """
751
+
752
+ return series.map(cls.py_op, na_action="ignore")
753
+
754
+ @classmethod
755
+ def validate(cls, operand: Any) -> Any:
756
+ """
757
+ The main function for validate, applies the implicit promotion (or check it), and
758
+ can do a semantic check too.
759
+ Returns an operand.
760
+ """
761
+
762
+ if isinstance(operand, Dataset):
763
+ return cls.dataset_validation(operand)
764
+ elif isinstance(operand, DataComponent):
765
+ return cls.component_validation(operand)
766
+ elif isinstance(operand, Scalar):
767
+ return cls.scalar_validation(operand)
768
+
769
+ @classmethod
770
+ def dataset_validation(cls, operand: Dataset) -> Dataset:
771
+ dataset_name = VirtualCounter._new_ds_name()
772
+ cls.validate_dataset_type(operand)
773
+ if len(operand.get_measures()) == 0:
774
+ raise SemanticError("1-1-1-8", op=cls.op, name=operand.name)
775
+ result_components = {
776
+ comp_name: copy(comp)
777
+ for comp_name, comp in operand.components.items()
778
+ if comp.role in [Role.IDENTIFIER, Role.MEASURE]
779
+ }
780
+
781
+ result_dataset = Dataset(name=dataset_name, components=result_components, data=None)
782
+ cls.apply_return_type_dataset(result_dataset, operand)
783
+ return result_dataset
784
+
785
+ @classmethod
786
+ def scalar_validation(cls, operand: Scalar) -> Scalar:
787
+ result_type = cls.type_validation(operand.data_type)
788
+ result = Scalar(name="result", data_type=result_type, value=None)
789
+ return result
790
+
791
+ @classmethod
792
+ def component_validation(cls, operand: DataComponent) -> DataComponent:
793
+ comp_name = VirtualCounter._new_dc_name()
794
+ result_type = cls.type_validation(operand.data_type)
795
+ result = DataComponent(
796
+ name=comp_name,
797
+ data_type=result_type,
798
+ data=None,
799
+ role=operand.role,
800
+ nullable=operand.nullable,
801
+ )
802
+ return result
803
+
804
+ # The following class method implements the type promotion
805
+ @classmethod
806
+ def type_validation(cls, operand: Any) -> Any:
807
+ return unary_implicit_promotion(operand, cls.type_to_check, cls.return_type)
808
+
809
+ # The following class method checks the type promotion
810
+ @classmethod
811
+ def validate_type_compatibility(cls, operand: Any) -> bool:
812
+ return check_unary_implicit_promotion(operand, cls.type_to_check, cls.return_type)
813
+
814
+ @classmethod
815
+ def validate_dataset_type(cls, dataset: Dataset) -> None:
816
+ if cls.type_to_check is not None:
817
+ for measure in dataset.get_measures():
818
+ if not cls.validate_type_compatibility(measure.data_type):
819
+ raise SemanticError(
820
+ "1-1-1-3",
821
+ op=cls.op,
822
+ entity=measure.role.value,
823
+ name=measure.name,
824
+ target_type=SCALAR_TYPES_CLASS_REVERSE[cls.type_to_check],
825
+ )
826
+
827
+ @classmethod
828
+ def validate_scalar_type(cls, scalar: Scalar) -> None:
829
+ if cls.type_to_check is not None and not cls.validate_type_compatibility(scalar.data_type):
830
+ raise SemanticError(
831
+ "1-1-1-5",
832
+ op=cls.op,
833
+ name=scalar.name,
834
+ type=SCALAR_TYPES_CLASS_REVERSE[scalar.data_type],
835
+ )
836
+
837
+ @classmethod
838
+ def apply_return_type_dataset(cls, result_dataset: Dataset, operand: Dataset) -> None:
839
+ changed_allowed = cls.op in MONOMEASURE_CHANGED_ALLOWED
840
+ is_mono_measure = len(operand.get_measures()) == 1
841
+ for measure in result_dataset.get_measures():
842
+ operand_type = operand.get_component(measure.name).data_type
843
+
844
+ result_data_type = cls.type_validation(operand_type)
845
+ if is_mono_measure and operand_type.promotion_changed_type(result_data_type):
846
+ component = Component(
847
+ name=COMP_NAME_MAPPING[result_data_type],
848
+ data_type=result_data_type,
849
+ role=Role.MEASURE,
850
+ nullable=measure.nullable,
851
+ )
852
+ result_dataset.delete_component(measure.name)
853
+ result_dataset.add_component(component)
854
+ if result_dataset.data is not None:
855
+ result_dataset.data.rename(columns={measure.name: component.name}, inplace=True)
856
+ elif (
857
+ changed_allowed is False
858
+ and is_mono_measure is False
859
+ and operand_type.promotion_changed_type(result_data_type)
860
+ ):
861
+ raise SemanticError("1-1-1-4", op=cls.op)
862
+ else:
863
+ measure.data_type = result_data_type
864
+
865
+ @classmethod
866
+ def evaluate(cls, operand: ALL_MODEL_DATA_TYPES) -> Any:
867
+ if isinstance(operand, Dataset):
868
+ return cls.dataset_evaluation(operand)
869
+ if isinstance(operand, Scalar):
870
+ return cls.scalar_evaluation(operand)
871
+ if isinstance(operand, DataComponent):
872
+ return cls.component_evaluation(operand)
873
+
874
+ @classmethod
875
+ def dataset_evaluation(cls, operand: Dataset) -> Dataset:
876
+ result_dataset = cls.dataset_validation(operand)
877
+ result_data = operand.data.copy() if operand.data is not None else pd.DataFrame()
878
+ for measure_name in operand.get_measures_names():
879
+ result_data[measure_name] = cls.apply_operation_component(result_data[measure_name])
880
+
881
+ cols_to_keep = operand.get_identifiers_names() + operand.get_measures_names()
882
+ result_data = result_data[cols_to_keep]
883
+
884
+ result_dataset.data = result_data
885
+ cls.modify_measure_column(result_dataset)
886
+ return result_dataset
887
+
888
+ @classmethod
889
+ def scalar_evaluation(cls, operand: Scalar) -> Scalar:
890
+ result_scalar = cls.scalar_validation(operand)
891
+ result_scalar.value = cls.op_func(operand.value)
892
+ return result_scalar
893
+
894
+ @classmethod
895
+ def component_evaluation(cls, operand: DataComponent) -> DataComponent:
896
+ result_component = cls.component_validation(operand)
897
+ result_component.data = cls.apply_operation_component(
898
+ operand.data.copy() if operand.data is not None else pd.Series()
899
+ )
900
+ return result_component