vtlengine 1.4.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. vtlengine/API/_InternalApi.py +791 -0
  2. vtlengine/API/__init__.py +612 -0
  3. vtlengine/API/data/schema/external_routines_schema.json +34 -0
  4. vtlengine/API/data/schema/json_schema_2.1.json +116 -0
  5. vtlengine/API/data/schema/value_domain_schema.json +97 -0
  6. vtlengine/AST/ASTComment.py +57 -0
  7. vtlengine/AST/ASTConstructor.py +598 -0
  8. vtlengine/AST/ASTConstructorModules/Expr.py +1928 -0
  9. vtlengine/AST/ASTConstructorModules/ExprComponents.py +995 -0
  10. vtlengine/AST/ASTConstructorModules/Terminals.py +790 -0
  11. vtlengine/AST/ASTConstructorModules/__init__.py +50 -0
  12. vtlengine/AST/ASTDataExchange.py +10 -0
  13. vtlengine/AST/ASTEncoders.py +32 -0
  14. vtlengine/AST/ASTString.py +675 -0
  15. vtlengine/AST/ASTTemplate.py +558 -0
  16. vtlengine/AST/ASTVisitor.py +25 -0
  17. vtlengine/AST/DAG/__init__.py +479 -0
  18. vtlengine/AST/DAG/_words.py +10 -0
  19. vtlengine/AST/Grammar/Vtl.g4 +705 -0
  20. vtlengine/AST/Grammar/VtlTokens.g4 +409 -0
  21. vtlengine/AST/Grammar/__init__.py +0 -0
  22. vtlengine/AST/Grammar/lexer.py +2139 -0
  23. vtlengine/AST/Grammar/parser.py +16597 -0
  24. vtlengine/AST/Grammar/tokens.py +169 -0
  25. vtlengine/AST/VtlVisitor.py +824 -0
  26. vtlengine/AST/__init__.py +674 -0
  27. vtlengine/DataTypes/TimeHandling.py +562 -0
  28. vtlengine/DataTypes/__init__.py +863 -0
  29. vtlengine/DataTypes/_time_checking.py +135 -0
  30. vtlengine/Exceptions/__exception_file_generator.py +96 -0
  31. vtlengine/Exceptions/__init__.py +159 -0
  32. vtlengine/Exceptions/messages.py +1004 -0
  33. vtlengine/Interpreter/__init__.py +2048 -0
  34. vtlengine/Model/__init__.py +501 -0
  35. vtlengine/Operators/Aggregation.py +357 -0
  36. vtlengine/Operators/Analytic.py +455 -0
  37. vtlengine/Operators/Assignment.py +23 -0
  38. vtlengine/Operators/Boolean.py +106 -0
  39. vtlengine/Operators/CastOperator.py +451 -0
  40. vtlengine/Operators/Clause.py +366 -0
  41. vtlengine/Operators/Comparison.py +488 -0
  42. vtlengine/Operators/Conditional.py +495 -0
  43. vtlengine/Operators/General.py +191 -0
  44. vtlengine/Operators/HROperators.py +254 -0
  45. vtlengine/Operators/Join.py +447 -0
  46. vtlengine/Operators/Numeric.py +422 -0
  47. vtlengine/Operators/RoleSetter.py +77 -0
  48. vtlengine/Operators/Set.py +176 -0
  49. vtlengine/Operators/String.py +578 -0
  50. vtlengine/Operators/Time.py +1144 -0
  51. vtlengine/Operators/Validation.py +275 -0
  52. vtlengine/Operators/__init__.py +900 -0
  53. vtlengine/Utils/__Virtual_Assets.py +34 -0
  54. vtlengine/Utils/__init__.py +479 -0
  55. vtlengine/__extras_check.py +17 -0
  56. vtlengine/__init__.py +27 -0
  57. vtlengine/files/__init__.py +0 -0
  58. vtlengine/files/output/__init__.py +35 -0
  59. vtlengine/files/output/_time_period_representation.py +55 -0
  60. vtlengine/files/parser/__init__.py +240 -0
  61. vtlengine/files/parser/_rfc_dialect.py +22 -0
  62. vtlengine/py.typed +0 -0
  63. vtlengine-1.4.0rc2.dist-info/METADATA +89 -0
  64. vtlengine-1.4.0rc2.dist-info/RECORD +66 -0
  65. vtlengine-1.4.0rc2.dist-info/WHEEL +4 -0
  66. vtlengine-1.4.0rc2.dist-info/licenses/LICENSE.md +661 -0
@@ -0,0 +1,488 @@
1
+ import operator
2
+ import re
3
+ from copy import copy
4
+ from typing import Any, Optional, Union
5
+
6
+ # if os.environ.get("SPARK"):
7
+ # import pyspark.pandas as pd
8
+ # else:
9
+ # import pandas as pd
10
+ import pandas as pd
11
+
12
+ import vtlengine.Operators as Operator
13
+ from vtlengine.AST.Grammar.tokens import (
14
+ CHARSET_MATCH,
15
+ EQ,
16
+ GT,
17
+ GTE,
18
+ IN,
19
+ ISNULL,
20
+ LT,
21
+ LTE,
22
+ NEQ,
23
+ NOT_IN,
24
+ )
25
+ from vtlengine.DataTypes import COMP_NAME_MAPPING, Boolean, Null, Number, String
26
+ from vtlengine.Exceptions import SemanticError
27
+ from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar, ScalarSet
28
+ from vtlengine.Utils.__Virtual_Assets import VirtualCounter
29
+
30
+
31
+ class Unary(Operator.Unary):
32
+ """
33
+ Unary comparison operator. It returns a boolean.
34
+ """
35
+
36
+ return_type = Boolean
37
+
38
+
39
+ class IsNull(Unary):
40
+ """
41
+ Class that allows to perform the isnull comparison operator.
42
+ It has different class methods to allow performing the operation with different datatypes.
43
+ """
44
+
45
+ op = ISNULL
46
+ py_op = pd.isnull
47
+
48
+ @classmethod
49
+ def apply_operation_component(cls, series: Any) -> Any:
50
+ return series.isnull()
51
+
52
+ @classmethod
53
+ def op_func(cls, x: Any) -> Any:
54
+ return pd.isnull(x)
55
+
56
+ @classmethod
57
+ def dataset_validation(cls, operand: Dataset) -> Dataset:
58
+ result = super().dataset_validation(operand)
59
+ for measure in result.get_measures():
60
+ measure.nullable = False
61
+ return result
62
+
63
+ @classmethod
64
+ def component_validation(cls, operand: DataComponent) -> DataComponent:
65
+ result = super().component_validation(operand)
66
+ result.nullable = False
67
+ return result
68
+
69
+
70
+ class Binary(Operator.Binary):
71
+ """
72
+ Binary comparison operator. It returns a boolean.
73
+ """
74
+
75
+ return_type = Boolean
76
+
77
+ @classmethod
78
+ def _cast_values(
79
+ cls,
80
+ x: Optional[Union[int, float, str, bool]],
81
+ y: Optional[Union[int, float, str, bool]],
82
+ ) -> Any:
83
+ # Cast values to compatible types for comparison
84
+ try:
85
+ if isinstance(x, str) and isinstance(y, bool):
86
+ y = String.cast(y)
87
+ elif isinstance(x, bool) and isinstance(y, str):
88
+ x = String.cast(x)
89
+ elif isinstance(x, str) and isinstance(y, (int, float)):
90
+ x = Number.cast(x)
91
+ elif isinstance(x, (int, float)) and isinstance(y, str):
92
+ y = Number.cast(y)
93
+ except ValueError:
94
+ x = str(x)
95
+ y = str(y)
96
+
97
+ return x, y
98
+
99
+ @classmethod
100
+ def op_func(cls, x: Any, y: Any) -> Any:
101
+ # Return None if any of the values are NaN
102
+ if pd.isnull(x) or pd.isnull(y):
103
+ return None
104
+ x, y = cls._cast_values(x, y)
105
+ return cls.py_op(x, y)
106
+
107
+ @classmethod
108
+ def apply_operation_series_scalar(cls, series: Any, scalar: Any, series_left: bool) -> Any:
109
+ if pd.isnull(scalar):
110
+ return pd.Series(None, index=series.index)
111
+
112
+ first_non_null = series.dropna().iloc[0] if not series.dropna().empty else None
113
+ if first_non_null is not None:
114
+ scalar, first_non_null = cls._cast_values(scalar, first_non_null)
115
+
116
+ series_type = pd.api.types.infer_dtype(series, skipna=True)
117
+ first_non_null_type = pd.api.types.infer_dtype([first_non_null])
118
+
119
+ if series_type != first_non_null_type:
120
+ if isinstance(first_non_null, str):
121
+ series = series.astype(str)
122
+ elif isinstance(first_non_null, (int, float)):
123
+ series = series.astype(float)
124
+
125
+ op = cls.py_op if cls.py_op is not None else cls.op_func
126
+ if series_left:
127
+ result = series.map(lambda x: op(x, scalar), na_action="ignore")
128
+ else:
129
+ result = series.map(lambda x: op(scalar, x), na_action="ignore")
130
+
131
+ return result
132
+
133
+ @classmethod
134
+ def apply_return_type_dataset(
135
+ cls,
136
+ result_dataset: Dataset,
137
+ left_operand: Dataset,
138
+ right_operand: Union[Dataset, Scalar, ScalarSet],
139
+ ) -> None:
140
+ super().apply_return_type_dataset(result_dataset, left_operand, right_operand)
141
+ is_mono_measure = len(result_dataset.get_measures()) == 1
142
+ if is_mono_measure:
143
+ measure = result_dataset.get_measures()[0]
144
+ component = Component(
145
+ name=COMP_NAME_MAPPING[Boolean],
146
+ data_type=Boolean,
147
+ role=Role.MEASURE,
148
+ nullable=measure.nullable,
149
+ )
150
+ result_dataset.delete_component(measure.name)
151
+ result_dataset.add_component(component)
152
+ if result_dataset.data is not None:
153
+ result_dataset.data.rename(columns={measure.name: component.name}, inplace=True)
154
+
155
+
156
+ class Equal(Binary):
157
+ op = EQ
158
+ py_op = operator.eq
159
+
160
+
161
+ class NotEqual(Binary):
162
+ op = NEQ
163
+ py_op = operator.ne
164
+
165
+
166
+ class Greater(Binary):
167
+ op = GT
168
+ py_op = operator.gt
169
+
170
+
171
+ class GreaterEqual(Binary):
172
+ op = GTE
173
+ py_op = operator.ge
174
+
175
+
176
+ class Less(Binary):
177
+ op = LT
178
+ py_op = operator.lt
179
+
180
+
181
+ class LessEqual(Binary):
182
+ op = LTE
183
+ py_op = operator.le
184
+
185
+
186
+ class In(Binary):
187
+ op = IN
188
+
189
+ @classmethod
190
+ def apply_operation_two_series(cls, left_series: Any, right_series: ScalarSet) -> Any:
191
+ if right_series.data_type == Null:
192
+ return pd.Series(None, index=left_series.index)
193
+
194
+ return left_series.map(lambda x: x in right_series, na_action="ignore")
195
+
196
+ @classmethod
197
+ def py_op(cls, x: Any, y: Any) -> Any:
198
+ if y.data_type == Null:
199
+ return None
200
+ return operator.contains(y, x)
201
+
202
+
203
+ class NotIn(Binary):
204
+ op = NOT_IN
205
+
206
+ @classmethod
207
+ def apply_operation_two_series(cls, left_series: Any, right_series: Any) -> Any:
208
+ series_result = In.apply_operation_two_series(left_series, right_series)
209
+ return series_result.map(lambda x: not x, na_action="ignore")
210
+
211
+ @classmethod
212
+ def py_op(cls, x: Any, y: Any) -> Any:
213
+ return not operator.contains(y, x)
214
+
215
+
216
+ class Match(Binary):
217
+ op = CHARSET_MATCH
218
+ type_to_check = String
219
+
220
+ @classmethod
221
+ def op_func(cls, x: Optional[str], y: Optional[str]) -> Optional[bool]:
222
+ if pd.isnull(x) or pd.isnull(y):
223
+ return None
224
+ if isinstance(x, pd.Series):
225
+ return x.str.fullmatch(y)
226
+ return bool(re.fullmatch(str(y), str(x)))
227
+
228
+
229
+ class Between(Operator.Operator):
230
+ return_type = Boolean
231
+ """
232
+ This comparison operator has the following class methods.
233
+
234
+ Class methods:
235
+ op_function: Sets the data to be manipulated.
236
+ apply_operation_component: Returns a pandas dataframe with the operation,
237
+
238
+ considering each component with the schema of op_function.
239
+
240
+ apply_return_type_dataset: Because the result must be a boolean,
241
+ this function evaluates if the measure is actually a boolean one.
242
+ """
243
+
244
+ @classmethod
245
+ def op_func(
246
+ cls,
247
+ x: Optional[Union[int, float, bool, str]],
248
+ y: Optional[Union[int, float, bool, str]],
249
+ z: Optional[Union[int, float, bool, str]],
250
+ ) -> Optional[bool]:
251
+ return (
252
+ None if (pd.isnull(x) or pd.isnull(y) or pd.isnull(z)) else y <= x <= z # type: ignore[operator]
253
+ )
254
+
255
+ @classmethod
256
+ def apply_operation_component(cls, series: Any, from_data: Any, to_data: Any) -> Any:
257
+ control_any_series_from_to = isinstance(from_data, pd.Series) or isinstance(
258
+ to_data, pd.Series
259
+ )
260
+ if control_any_series_from_to:
261
+ if not isinstance(from_data, pd.Series):
262
+ from_data = pd.Series(from_data, index=series.index, dtype=object)
263
+ if not isinstance(to_data, pd.Series):
264
+ to_data = pd.Series(to_data, index=series.index)
265
+ df = pd.DataFrame({"operand": series, "from_data": from_data, "to_data": to_data})
266
+ return df.apply(
267
+ lambda x: cls.op_func(x["operand"], x["from_data"], x["to_data"]),
268
+ axis=1,
269
+ )
270
+
271
+ return series.map(lambda x: cls.op_func(x, from_data, to_data))
272
+
273
+ @classmethod
274
+ def apply_return_type_dataset(cls, result_dataset: Dataset, operand: Dataset) -> None:
275
+ is_mono_measure = len(operand.get_measures()) == 1
276
+ for measure in result_dataset.get_measures():
277
+ operand_type = operand.get_component(measure.name).data_type
278
+ result_data_type = cls.type_validation(operand_type)
279
+ if is_mono_measure and operand_type.promotion_changed_type(result_data_type):
280
+ component = Component(
281
+ name=COMP_NAME_MAPPING[result_data_type],
282
+ data_type=result_data_type,
283
+ role=Role.MEASURE,
284
+ nullable=measure.nullable,
285
+ )
286
+ result_dataset.delete_component(measure.name)
287
+ result_dataset.add_component(component)
288
+ if result_dataset.data is not None:
289
+ result_dataset.data.rename(columns={measure.name: component.name}, inplace=True)
290
+ elif is_mono_measure is False and operand_type.promotion_changed_type(result_data_type):
291
+ raise SemanticError("1-1-1-4", op=cls.op)
292
+ else:
293
+ measure.data_type = result_data_type
294
+
295
+ @classmethod
296
+ def validate(
297
+ cls,
298
+ operand: Union[Dataset, DataComponent, Scalar],
299
+ from_: Union[DataComponent, Scalar],
300
+ to: Union[DataComponent, Scalar],
301
+ ) -> Any:
302
+ result: Union[Dataset, DataComponent, Scalar]
303
+ if isinstance(operand, Dataset):
304
+ if len(operand.get_measures()) == 0:
305
+ raise SemanticError("1-1-1-8", op=cls.op, name=operand.name)
306
+ result_components = {
307
+ comp_name: copy(comp)
308
+ for comp_name, comp in operand.components.items()
309
+ if comp.role == Role.IDENTIFIER or comp.role == Role.MEASURE
310
+ }
311
+ result = Dataset(name=operand.name, components=result_components, data=None)
312
+ elif isinstance(operand, DataComponent):
313
+ result = DataComponent(
314
+ name=operand.name,
315
+ data=None,
316
+ data_type=cls.return_type,
317
+ role=operand.role,
318
+ nullable=operand.nullable,
319
+ )
320
+ elif isinstance(from_, Scalar) and isinstance(to, Scalar):
321
+ result = Scalar(name=operand.name, value=None, data_type=cls.return_type)
322
+ else:
323
+ # From or To is a DataComponent, or both
324
+ result = DataComponent(
325
+ name=operand.name,
326
+ data=None,
327
+ data_type=cls.return_type,
328
+ role=Role.MEASURE,
329
+ )
330
+
331
+ if isinstance(operand, Dataset):
332
+ for measure in operand.get_measures():
333
+ cls.validate_type_compatibility(measure.data_type, from_.data_type)
334
+ cls.validate_type_compatibility(measure.data_type, to.data_type)
335
+ if isinstance(result, Dataset):
336
+ cls.apply_return_type_dataset(result, operand)
337
+ else:
338
+ cls.validate_type_compatibility(operand.data_type, from_.data_type)
339
+ cls.validate_type_compatibility(operand.data_type, to.data_type)
340
+
341
+ return result
342
+
343
+ @classmethod
344
+ def evaluate(
345
+ cls,
346
+ operand: Union[DataComponent, Scalar],
347
+ from_: Union[DataComponent, Scalar],
348
+ to: Union[DataComponent, Scalar],
349
+ ) -> Any:
350
+ result = cls.validate(operand, from_, to)
351
+ from_data = from_.data if isinstance(from_, DataComponent) else from_.value
352
+ to_data = to.data if isinstance(to, DataComponent) else to.value
353
+
354
+ if (
355
+ isinstance(from_data, pd.Series)
356
+ and isinstance(to_data, pd.Series)
357
+ and len(from_data) != len(to_data)
358
+ ):
359
+ raise ValueError("From and To must have the same length")
360
+
361
+ if isinstance(operand, Dataset):
362
+ result.data = operand.data.copy()
363
+ for measure_name in operand.get_measures_names():
364
+ result.data[measure_name] = cls.apply_operation_component(
365
+ operand.data[measure_name], from_data, to_data
366
+ )
367
+ if len(result.get_measures()) == 1:
368
+ result.data[COMP_NAME_MAPPING[cls.return_type]] = result.data[measure_name]
369
+ result.data = result.data.drop(columns=[measure_name])
370
+ result.data = result.data[result.get_components_names()]
371
+ if isinstance(operand, DataComponent):
372
+ result.data = cls.apply_operation_component(operand.data, from_data, to_data)
373
+ if isinstance(operand, Scalar) and isinstance(from_, Scalar) and isinstance(to, Scalar):
374
+ if operand.value is None or from_data is None or to_data is None:
375
+ result.value = None
376
+ else:
377
+ result.value = from_data <= operand.value <= to_data
378
+ elif isinstance(operand, Scalar) and (
379
+ isinstance(from_data, pd.Series) or isinstance(to_data, pd.Series)
380
+ ): # From or To is a DataComponent, or both
381
+ if isinstance(from_data, pd.Series):
382
+ series = pd.Series(operand.value, index=from_data.index, dtype=object)
383
+ elif isinstance(to_data, pd.Series):
384
+ series = pd.Series(operand.value, index=to_data.index, dtype=object)
385
+ result_series = cls.apply_operation_component(series, from_data, to_data)
386
+ result = DataComponent(
387
+ name=operand.name,
388
+ data=result_series,
389
+ data_type=cls.return_type,
390
+ role=Role.MEASURE,
391
+ )
392
+ return result
393
+
394
+
395
+ class ExistIn(Operator.Operator):
396
+ """
397
+ Class methods:
398
+ validate: Sets the identifiers and check if the left one exists in the right one.
399
+ evaluate: Evaluates if the result data type is actually a boolean.
400
+ """
401
+
402
+ op = IN
403
+
404
+ # noinspection PyTypeChecker
405
+ @classmethod
406
+ def validate(
407
+ cls, dataset_1: Dataset, dataset_2: Dataset, retain_element: Optional[Boolean]
408
+ ) -> Any:
409
+ dataset_name = VirtualCounter._new_ds_name()
410
+ left_identifiers = dataset_1.get_identifiers_names()
411
+ right_identifiers = dataset_2.get_identifiers_names()
412
+
413
+ is_subset_right = set(right_identifiers).issubset(left_identifiers)
414
+ is_subset_left = set(left_identifiers).issubset(right_identifiers)
415
+ if not (is_subset_left or is_subset_right):
416
+ raise ValueError("Datasets must have common identifiers")
417
+
418
+ result_components = {comp.name: copy(comp) for comp in dataset_1.get_identifiers()}
419
+ result_dataset = Dataset(name=dataset_name, components=result_components, data=None)
420
+ result_dataset.add_component(
421
+ Component(name="bool_var", data_type=Boolean, role=Role.MEASURE, nullable=False)
422
+ )
423
+ return result_dataset
424
+
425
+ @classmethod
426
+ def evaluate(
427
+ cls, dataset_1: Dataset, dataset_2: Dataset, retain_element: Optional[Boolean]
428
+ ) -> Any:
429
+ result_dataset = cls.validate(dataset_1, dataset_2, retain_element)
430
+
431
+ # Checking the subset
432
+ left_id_names = dataset_1.get_identifiers_names()
433
+ right_id_names = dataset_2.get_identifiers_names()
434
+ is_subset_left = set(left_id_names).issubset(right_id_names)
435
+
436
+ # Identifiers for the result dataset
437
+ reference_identifiers_names = left_id_names
438
+
439
+ # Checking if the left dataset is a subset of the right dataset
440
+ common_columns = left_id_names if is_subset_left else right_id_names
441
+
442
+ # Check if the common identifiers are equal between the two datasets
443
+ if dataset_1.data is not None and dataset_2.data is not None:
444
+ true_results = pd.merge(
445
+ dataset_1.data,
446
+ dataset_2.data,
447
+ how="inner",
448
+ left_on=common_columns,
449
+ right_on=common_columns,
450
+ )
451
+ true_results = true_results[reference_identifiers_names]
452
+ else:
453
+ true_results = pd.DataFrame(columns=reference_identifiers_names)
454
+
455
+ # Check for empty values
456
+ if true_results.empty:
457
+ true_results["bool_var"] = None
458
+ else:
459
+ true_results["bool_var"] = True
460
+ if dataset_1.data is None:
461
+ dataset_1.data = pd.DataFrame(columns=reference_identifiers_names)
462
+ final_result = pd.merge(
463
+ dataset_1.data,
464
+ true_results,
465
+ how="left",
466
+ left_on=reference_identifiers_names,
467
+ right_on=reference_identifiers_names,
468
+ )
469
+ final_result = final_result[reference_identifiers_names + ["bool_var"]]
470
+
471
+ # No null values are returned, only True or False
472
+ final_result["bool_var"] = final_result["bool_var"].fillna(False)
473
+
474
+ # Adding to the result dataset
475
+ result_dataset.data = final_result
476
+
477
+ # Retain only the elements that are specified (True or False)
478
+ if retain_element is not None:
479
+ result_dataset.data = result_dataset.data[
480
+ result_dataset.data["bool_var"] == retain_element
481
+ ]
482
+ result_dataset.data = result_dataset.data.reset_index(drop=True)
483
+
484
+ return result_dataset
485
+
486
+ @staticmethod
487
+ def _check_all_columns(row: Any) -> bool:
488
+ return all(col_value == True for col_value in row)