vtlengine 1.0.3rc2__py3-none-any.whl → 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +55 -20
- vtlengine/API/__init__.py +11 -2
- vtlengine/API/data/schema/json_schema_2.1.json +116 -0
- vtlengine/AST/ASTConstructor.py +5 -4
- vtlengine/AST/ASTConstructorModules/Expr.py +47 -48
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +45 -23
- vtlengine/AST/ASTConstructorModules/Terminals.py +21 -11
- vtlengine/AST/ASTEncoders.py +1 -1
- vtlengine/AST/DAG/__init__.py +0 -3
- vtlengine/AST/Grammar/lexer.py +0 -1
- vtlengine/AST/Grammar/parser.py +185 -440
- vtlengine/AST/VtlVisitor.py +0 -1
- vtlengine/DataTypes/TimeHandling.py +50 -15
- vtlengine/DataTypes/__init__.py +79 -7
- vtlengine/Exceptions/__init__.py +3 -5
- vtlengine/Exceptions/messages.py +65 -105
- vtlengine/Interpreter/__init__.py +83 -38
- vtlengine/Model/__init__.py +7 -9
- vtlengine/Operators/Aggregation.py +13 -7
- vtlengine/Operators/Analytic.py +48 -9
- vtlengine/Operators/Assignment.py +0 -1
- vtlengine/Operators/CastOperator.py +44 -44
- vtlengine/Operators/Clause.py +16 -10
- vtlengine/Operators/Comparison.py +20 -12
- vtlengine/Operators/Conditional.py +30 -13
- vtlengine/Operators/General.py +9 -4
- vtlengine/Operators/HROperators.py +4 -14
- vtlengine/Operators/Join.py +15 -14
- vtlengine/Operators/Numeric.py +32 -26
- vtlengine/Operators/RoleSetter.py +6 -2
- vtlengine/Operators/Set.py +12 -8
- vtlengine/Operators/String.py +9 -9
- vtlengine/Operators/Time.py +136 -116
- vtlengine/Operators/Validation.py +10 -4
- vtlengine/Operators/__init__.py +56 -69
- vtlengine/Utils/__init__.py +6 -1
- vtlengine/files/output/__init__.py +0 -1
- vtlengine/files/output/_time_period_representation.py +2 -1
- vtlengine/files/parser/__init__.py +44 -10
- vtlengine/files/parser/_rfc_dialect.py +1 -1
- vtlengine/files/parser/_time_checking.py +4 -4
- {vtlengine-1.0.3rc2.dist-info → vtlengine-1.0.4.dist-info}/METADATA +9 -7
- vtlengine-1.0.4.dist-info/RECORD +58 -0
- {vtlengine-1.0.3rc2.dist-info → vtlengine-1.0.4.dist-info}/WHEEL +1 -1
- vtlengine/DataTypes/NumericTypesHandling.py +0 -38
- vtlengine-1.0.3rc2.dist-info/RECORD +0 -58
- {vtlengine-1.0.3rc2.dist-info → vtlengine-1.0.4.dist-info}/LICENSE.md +0 -0
vtlengine/Operators/__init__.py
CHANGED
|
@@ -8,7 +8,20 @@ from typing import Any, Optional, Union
|
|
|
8
8
|
# import pandas as pd
|
|
9
9
|
import pandas as pd
|
|
10
10
|
|
|
11
|
-
from vtlengine.AST.Grammar.tokens import
|
|
11
|
+
from vtlengine.AST.Grammar.tokens import (
|
|
12
|
+
AND,
|
|
13
|
+
CEIL,
|
|
14
|
+
EQ,
|
|
15
|
+
FLOOR,
|
|
16
|
+
GT,
|
|
17
|
+
GTE,
|
|
18
|
+
LT,
|
|
19
|
+
LTE,
|
|
20
|
+
NEQ,
|
|
21
|
+
OR,
|
|
22
|
+
ROUND,
|
|
23
|
+
XOR,
|
|
24
|
+
)
|
|
12
25
|
from vtlengine.DataTypes import (
|
|
13
26
|
COMP_NAME_MAPPING,
|
|
14
27
|
SCALAR_TYPES_CLASS_REVERSE,
|
|
@@ -18,7 +31,7 @@ from vtlengine.DataTypes import (
|
|
|
18
31
|
unary_implicit_promotion,
|
|
19
32
|
)
|
|
20
33
|
from vtlengine.DataTypes.TimeHandling import (
|
|
21
|
-
|
|
34
|
+
PERIOD_IND_MAPPING,
|
|
22
35
|
TimeIntervalHandler,
|
|
23
36
|
TimePeriodHandler,
|
|
24
37
|
)
|
|
@@ -54,7 +67,6 @@ class Operator:
|
|
|
54
67
|
|
|
55
68
|
@classmethod
|
|
56
69
|
def cast_time_types(cls, data_type: Any, series: Any) -> Any:
|
|
57
|
-
|
|
58
70
|
if cls.op not in BINARY_COMPARISON_OPERATORS:
|
|
59
71
|
return series
|
|
60
72
|
if data_type.__name__ == "TimeInterval":
|
|
@@ -64,7 +76,7 @@ class Operator:
|
|
|
64
76
|
elif data_type.__name__ == "TimePeriod":
|
|
65
77
|
series = series.map(lambda x: TimePeriodHandler(x), na_action="ignore")
|
|
66
78
|
elif data_type.__name__ == "Duration":
|
|
67
|
-
series = series.map(lambda x:
|
|
79
|
+
series = series.map(lambda x: PERIOD_IND_MAPPING[x], na_action="ignore")
|
|
68
80
|
return series
|
|
69
81
|
|
|
70
82
|
@classmethod
|
|
@@ -76,9 +88,9 @@ class Operator:
|
|
|
76
88
|
elif data_type.__name__ == "TimePeriod":
|
|
77
89
|
return TimePeriodHandler(value)
|
|
78
90
|
elif data_type.__name__ == "Duration":
|
|
79
|
-
if value not in
|
|
91
|
+
if value not in PERIOD_IND_MAPPING:
|
|
80
92
|
raise Exception(f"Duration {value} is not valid")
|
|
81
|
-
return
|
|
93
|
+
return PERIOD_IND_MAPPING[value]
|
|
82
94
|
return value
|
|
83
95
|
|
|
84
96
|
@classmethod
|
|
@@ -165,11 +177,11 @@ class Operator:
|
|
|
165
177
|
|
|
166
178
|
|
|
167
179
|
def _id_type_promotion_join_keys(
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
180
|
+
c_left: Component,
|
|
181
|
+
c_right: Component,
|
|
182
|
+
join_key: str,
|
|
183
|
+
left_data: Optional[pd.DataFrame] = None,
|
|
184
|
+
right_data: Optional[pd.DataFrame] = None,
|
|
173
185
|
) -> None:
|
|
174
186
|
if left_data is None:
|
|
175
187
|
left_data = pd.DataFrame()
|
|
@@ -184,7 +196,7 @@ def _id_type_promotion_join_keys(
|
|
|
184
196
|
right_data[join_key] = right_data[join_key].astype(object)
|
|
185
197
|
return
|
|
186
198
|
if (left_type_name == "Integer" and right_type_name == "Number") or (
|
|
187
|
-
|
|
199
|
+
left_type_name == "Number" and right_type_name == "Integer"
|
|
188
200
|
):
|
|
189
201
|
left_data[join_key] = left_data[join_key].map(lambda x: int(float(x)))
|
|
190
202
|
right_data[join_key] = right_data[join_key].map(lambda x: int(float(x)))
|
|
@@ -209,7 +221,6 @@ def _handle_str_number(x: Union[str, int, float]) -> Union[str, int, float]:
|
|
|
209
221
|
|
|
210
222
|
|
|
211
223
|
class Binary(Operator):
|
|
212
|
-
|
|
213
224
|
@classmethod
|
|
214
225
|
def op_func(cls, *args: Any) -> Any:
|
|
215
226
|
x, y = args
|
|
@@ -220,7 +231,6 @@ class Binary(Operator):
|
|
|
220
231
|
|
|
221
232
|
@classmethod
|
|
222
233
|
def apply_operation_two_series(cls, left_series: Any, right_series: Any) -> Any:
|
|
223
|
-
|
|
224
234
|
if os.getenv("SPARK", False):
|
|
225
235
|
if cls.spark_op is None:
|
|
226
236
|
cls.spark_op = cls.py_op
|
|
@@ -234,12 +244,11 @@ class Binary(Operator):
|
|
|
234
244
|
|
|
235
245
|
@classmethod
|
|
236
246
|
def apply_operation_series_scalar(
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
247
|
+
cls,
|
|
248
|
+
series: Any,
|
|
249
|
+
scalar: Scalar,
|
|
250
|
+
series_left: bool,
|
|
241
251
|
) -> Any:
|
|
242
|
-
|
|
243
252
|
if scalar is None:
|
|
244
253
|
return pd.Series(None, index=series.index)
|
|
245
254
|
if series_left:
|
|
@@ -280,7 +289,6 @@ class Binary(Operator):
|
|
|
280
289
|
|
|
281
290
|
@classmethod
|
|
282
291
|
def dataset_validation(cls, left_operand: Dataset, right_operand: Dataset) -> Dataset:
|
|
283
|
-
|
|
284
292
|
left_identifiers = left_operand.get_identifiers_names()
|
|
285
293
|
right_identifiers = right_operand.get_identifiers_names()
|
|
286
294
|
|
|
@@ -293,7 +301,10 @@ class Binary(Operator):
|
|
|
293
301
|
|
|
294
302
|
if left_measures_names != right_measures_names:
|
|
295
303
|
raise SemanticError(
|
|
296
|
-
"1-1-14-1",
|
|
304
|
+
"1-1-14-1",
|
|
305
|
+
op=cls.op,
|
|
306
|
+
left=left_measures_names,
|
|
307
|
+
right=right_measures_names,
|
|
297
308
|
)
|
|
298
309
|
elif len(left_measures) == 0:
|
|
299
310
|
raise SemanticError("1-1-1-8", op=cls.op, name=left_operand.name)
|
|
@@ -331,7 +342,6 @@ class Binary(Operator):
|
|
|
331
342
|
|
|
332
343
|
@classmethod
|
|
333
344
|
def dataset_scalar_validation(cls, dataset: Dataset, scalar: Scalar) -> Dataset:
|
|
334
|
-
|
|
335
345
|
if len(dataset.get_measures()) == 0:
|
|
336
346
|
raise SemanticError("1-1-1-8", op=cls.op, name=dataset.name)
|
|
337
347
|
|
|
@@ -346,11 +356,12 @@ class Binary(Operator):
|
|
|
346
356
|
|
|
347
357
|
@classmethod
|
|
348
358
|
def scalar_validation(cls, left_operand: Scalar, right_operand: Scalar) -> Scalar:
|
|
349
|
-
|
|
350
359
|
if not cls.validate_type_compatibility(left_operand.data_type, right_operand.data_type):
|
|
351
360
|
raise SemanticError(
|
|
352
|
-
"1-1-1-2",
|
|
353
|
-
|
|
361
|
+
"1-1-1-2",
|
|
362
|
+
type_1=left_operand.data_type,
|
|
363
|
+
type_2=right_operand.data_type,
|
|
364
|
+
type_check=cls.type_to_check,
|
|
354
365
|
)
|
|
355
366
|
return Scalar(
|
|
356
367
|
name="result",
|
|
@@ -360,7 +371,7 @@ class Binary(Operator):
|
|
|
360
371
|
|
|
361
372
|
@classmethod
|
|
362
373
|
def component_validation(
|
|
363
|
-
|
|
374
|
+
cls, left_operand: DataComponent, right_operand: DataComponent
|
|
364
375
|
) -> DataComponent:
|
|
365
376
|
"""
|
|
366
377
|
Validates the compatibility between the types of the components and the operator
|
|
@@ -382,7 +393,6 @@ class Binary(Operator):
|
|
|
382
393
|
|
|
383
394
|
@classmethod
|
|
384
395
|
def component_scalar_validation(cls, component: DataComponent, scalar: Scalar) -> DataComponent:
|
|
385
|
-
|
|
386
396
|
cls.type_validation(component.data_type, scalar.data_type)
|
|
387
397
|
result = DataComponent(
|
|
388
398
|
name=component.name,
|
|
@@ -395,7 +405,6 @@ class Binary(Operator):
|
|
|
395
405
|
|
|
396
406
|
@classmethod
|
|
397
407
|
def dataset_set_validation(cls, dataset: Dataset, scalar_set: ScalarSet) -> Dataset:
|
|
398
|
-
|
|
399
408
|
if len(dataset.get_measures()) == 0:
|
|
400
409
|
raise SemanticError("1-1-1-8", op=cls.op, name=dataset.name)
|
|
401
410
|
for measure in dataset.get_measures():
|
|
@@ -412,9 +421,8 @@ class Binary(Operator):
|
|
|
412
421
|
|
|
413
422
|
@classmethod
|
|
414
423
|
def component_set_validation(
|
|
415
|
-
|
|
424
|
+
cls, component: DataComponent, scalar_set: ScalarSet
|
|
416
425
|
) -> DataComponent:
|
|
417
|
-
|
|
418
426
|
cls.type_validation(component.data_type, scalar_set.data_type)
|
|
419
427
|
result = DataComponent(
|
|
420
428
|
name="result",
|
|
@@ -427,7 +435,6 @@ class Binary(Operator):
|
|
|
427
435
|
|
|
428
436
|
@classmethod
|
|
429
437
|
def scalar_set_validation(cls, scalar: Scalar, scalar_set: ScalarSet) -> Scalar:
|
|
430
|
-
|
|
431
438
|
cls.type_validation(scalar.data_type, scalar_set.data_type)
|
|
432
439
|
return Scalar(
|
|
433
440
|
name="result",
|
|
@@ -468,7 +475,7 @@ class Binary(Operator):
|
|
|
468
475
|
|
|
469
476
|
@classmethod
|
|
470
477
|
def apply_return_type_dataset(
|
|
471
|
-
|
|
478
|
+
cls, result_dataset: Dataset, left_operand: Any, right_operand: Any
|
|
472
479
|
) -> None:
|
|
473
480
|
"""
|
|
474
481
|
Used in dataset's validation.
|
|
@@ -498,9 +505,9 @@ class Binary(Operator):
|
|
|
498
505
|
if result_dataset.data is not None:
|
|
499
506
|
result_dataset.data.rename(columns={measure.name: component.name}, inplace=True)
|
|
500
507
|
elif (
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
508
|
+
changed_allowed is False
|
|
509
|
+
and is_mono_measure is False
|
|
510
|
+
and left_type.promotion_changed_type(result_data_type)
|
|
504
511
|
):
|
|
505
512
|
raise SemanticError("1-1-1-4", op=cls.op)
|
|
506
513
|
else:
|
|
@@ -508,7 +515,6 @@ class Binary(Operator):
|
|
|
508
515
|
|
|
509
516
|
@classmethod
|
|
510
517
|
def dataset_evaluation(cls, left_operand: Dataset, right_operand: Dataset) -> Dataset:
|
|
511
|
-
|
|
512
518
|
result_dataset = cls.dataset_validation(left_operand, right_operand)
|
|
513
519
|
|
|
514
520
|
use_right_as_base = False
|
|
@@ -587,16 +593,14 @@ class Binary(Operator):
|
|
|
587
593
|
|
|
588
594
|
@classmethod
|
|
589
595
|
def scalar_evaluation(cls, left_operand: Scalar, right_operand: Scalar) -> Scalar:
|
|
590
|
-
|
|
591
596
|
result_scalar = cls.scalar_validation(left_operand, right_operand)
|
|
592
597
|
result_scalar.value = cls.op_func(left_operand.value, right_operand.value)
|
|
593
598
|
return result_scalar
|
|
594
599
|
|
|
595
600
|
@classmethod
|
|
596
601
|
def dataset_scalar_evaluation(
|
|
597
|
-
|
|
602
|
+
cls, dataset: Dataset, scalar: Scalar, dataset_left: bool = True
|
|
598
603
|
) -> Dataset:
|
|
599
|
-
|
|
600
604
|
result_dataset = cls.dataset_scalar_validation(dataset, scalar)
|
|
601
605
|
result_data = dataset.data.copy() if dataset.data is not None else pd.DataFrame()
|
|
602
606
|
result_dataset.data = result_data
|
|
@@ -606,9 +610,9 @@ class Binary(Operator):
|
|
|
606
610
|
for measure in dataset.get_measures():
|
|
607
611
|
measure_data = cls.cast_time_types(measure.data_type, result_data[measure.name].copy())
|
|
608
612
|
if measure.data_type.__name__.__str__() == "Duration" and not isinstance(
|
|
609
|
-
|
|
613
|
+
scalar_value, int
|
|
610
614
|
):
|
|
611
|
-
scalar_value =
|
|
615
|
+
scalar_value = PERIOD_IND_MAPPING[scalar_value]
|
|
612
616
|
result_dataset.data[measure.name] = cls.apply_operation_series_scalar(
|
|
613
617
|
measure_data, scalar_value, dataset_left
|
|
614
618
|
)
|
|
@@ -621,9 +625,8 @@ class Binary(Operator):
|
|
|
621
625
|
|
|
622
626
|
@classmethod
|
|
623
627
|
def component_evaluation(
|
|
624
|
-
|
|
628
|
+
cls, left_operand: DataComponent, right_operand: DataComponent
|
|
625
629
|
) -> DataComponent:
|
|
626
|
-
|
|
627
630
|
result_component = cls.component_validation(left_operand, right_operand)
|
|
628
631
|
left_data = cls.cast_time_types(
|
|
629
632
|
left_operand.data_type,
|
|
@@ -631,16 +634,15 @@ class Binary(Operator):
|
|
|
631
634
|
)
|
|
632
635
|
right_data = cls.cast_time_types(
|
|
633
636
|
right_operand.data_type,
|
|
634
|
-
right_operand.data.copy() if right_operand.data is not None else pd.Series(),
|
|
637
|
+
(right_operand.data.copy() if right_operand.data is not None else pd.Series()),
|
|
635
638
|
)
|
|
636
639
|
result_component.data = cls.apply_operation_two_series(left_data, right_data)
|
|
637
640
|
return result_component
|
|
638
641
|
|
|
639
642
|
@classmethod
|
|
640
643
|
def component_scalar_evaluation(
|
|
641
|
-
|
|
644
|
+
cls, component: DataComponent, scalar: Scalar, component_left: bool = True
|
|
642
645
|
) -> DataComponent:
|
|
643
|
-
|
|
644
646
|
result_component = cls.component_scalar_validation(component, scalar)
|
|
645
647
|
comp_data = cls.cast_time_types(
|
|
646
648
|
component.data_type,
|
|
@@ -648,9 +650,9 @@ class Binary(Operator):
|
|
|
648
650
|
)
|
|
649
651
|
scalar_value = cls.cast_time_types_scalar(scalar.data_type, scalar.value)
|
|
650
652
|
if component.data_type.__name__.__str__() == "Duration" and not isinstance(
|
|
651
|
-
|
|
653
|
+
scalar_value, int
|
|
652
654
|
):
|
|
653
|
-
scalar_value =
|
|
655
|
+
scalar_value = PERIOD_IND_MAPPING[scalar_value]
|
|
654
656
|
result_component.data = cls.apply_operation_series_scalar(
|
|
655
657
|
comp_data, scalar_value, component_left
|
|
656
658
|
)
|
|
@@ -658,7 +660,6 @@ class Binary(Operator):
|
|
|
658
660
|
|
|
659
661
|
@classmethod
|
|
660
662
|
def dataset_set_evaluation(cls, dataset: Dataset, scalar_set: ScalarSet) -> Dataset:
|
|
661
|
-
|
|
662
663
|
result_dataset = cls.dataset_set_validation(dataset, scalar_set)
|
|
663
664
|
result_data = dataset.data.copy() if dataset.data is not None else pd.DataFrame()
|
|
664
665
|
|
|
@@ -676,18 +677,17 @@ class Binary(Operator):
|
|
|
676
677
|
|
|
677
678
|
@classmethod
|
|
678
679
|
def component_set_evaluation(
|
|
679
|
-
|
|
680
|
+
cls, component: DataComponent, scalar_set: ScalarSet
|
|
680
681
|
) -> DataComponent:
|
|
681
|
-
|
|
682
682
|
result_component = cls.component_set_validation(component, scalar_set)
|
|
683
683
|
result_component.data = cls.apply_operation_two_series(
|
|
684
|
-
component.data.copy() if component.data is not None else pd.Series(),
|
|
684
|
+
component.data.copy() if component.data is not None else pd.Series(),
|
|
685
|
+
scalar_set,
|
|
685
686
|
)
|
|
686
687
|
return result_component
|
|
687
688
|
|
|
688
689
|
@classmethod
|
|
689
690
|
def scalar_set_evaluation(cls, scalar: Scalar, scalar_set: ScalarSet) -> Scalar:
|
|
690
|
-
|
|
691
691
|
result_scalar = cls.scalar_set_validation(scalar, scalar_set)
|
|
692
692
|
result_scalar.value = cls.op_func(scalar.value, scalar_set)
|
|
693
693
|
return result_scalar
|
|
@@ -726,7 +726,6 @@ class Binary(Operator):
|
|
|
726
726
|
|
|
727
727
|
|
|
728
728
|
class Unary(Operator):
|
|
729
|
-
|
|
730
729
|
@classmethod
|
|
731
730
|
def op_func(cls, *args: Any) -> Any:
|
|
732
731
|
x = args[0]
|
|
@@ -758,7 +757,6 @@ class Unary(Operator):
|
|
|
758
757
|
|
|
759
758
|
@classmethod
|
|
760
759
|
def dataset_validation(cls, operand: Dataset) -> Dataset:
|
|
761
|
-
|
|
762
760
|
cls.validate_dataset_type(operand)
|
|
763
761
|
if len(operand.get_measures()) == 0:
|
|
764
762
|
raise SemanticError("1-1-1-8", op=cls.op, name=operand.name)
|
|
@@ -774,14 +772,12 @@ class Unary(Operator):
|
|
|
774
772
|
|
|
775
773
|
@classmethod
|
|
776
774
|
def scalar_validation(cls, operand: Scalar) -> Scalar:
|
|
777
|
-
|
|
778
775
|
result_type = cls.type_validation(operand.data_type)
|
|
779
776
|
result = Scalar(name="result", data_type=result_type, value=None)
|
|
780
777
|
return result
|
|
781
778
|
|
|
782
779
|
@classmethod
|
|
783
780
|
def component_validation(cls, operand: DataComponent) -> DataComponent:
|
|
784
|
-
|
|
785
781
|
result_type = cls.type_validation(operand.data_type)
|
|
786
782
|
result = DataComponent(
|
|
787
783
|
name="result",
|
|
@@ -795,18 +791,15 @@ class Unary(Operator):
|
|
|
795
791
|
# The following class method implements the type promotion
|
|
796
792
|
@classmethod
|
|
797
793
|
def type_validation(cls, operand: Any) -> Any:
|
|
798
|
-
|
|
799
794
|
return unary_implicit_promotion(operand, cls.type_to_check, cls.return_type)
|
|
800
795
|
|
|
801
796
|
# The following class method checks the type promotion
|
|
802
797
|
@classmethod
|
|
803
798
|
def validate_type_compatibility(cls, operand: Any) -> bool:
|
|
804
|
-
|
|
805
799
|
return check_unary_implicit_promotion(operand, cls.type_to_check, cls.return_type)
|
|
806
800
|
|
|
807
801
|
@classmethod
|
|
808
802
|
def validate_dataset_type(cls, dataset: Dataset) -> None:
|
|
809
|
-
|
|
810
803
|
if cls.type_to_check is not None:
|
|
811
804
|
for measure in dataset.get_measures():
|
|
812
805
|
if not cls.validate_type_compatibility(measure.data_type):
|
|
@@ -820,7 +813,6 @@ class Unary(Operator):
|
|
|
820
813
|
|
|
821
814
|
@classmethod
|
|
822
815
|
def validate_scalar_type(cls, scalar: Scalar) -> None:
|
|
823
|
-
|
|
824
816
|
if cls.type_to_check is not None and not cls.validate_type_compatibility(scalar.data_type):
|
|
825
817
|
raise SemanticError(
|
|
826
818
|
"1-1-1-5",
|
|
@@ -831,7 +823,6 @@ class Unary(Operator):
|
|
|
831
823
|
|
|
832
824
|
@classmethod
|
|
833
825
|
def apply_return_type_dataset(cls, result_dataset: Dataset, operand: Dataset) -> None:
|
|
834
|
-
|
|
835
826
|
changed_allowed = cls.op in MONOMEASURE_CHANGED_ALLOWED
|
|
836
827
|
is_mono_measure = len(operand.get_measures()) == 1
|
|
837
828
|
for measure in result_dataset.get_measures():
|
|
@@ -850,9 +841,9 @@ class Unary(Operator):
|
|
|
850
841
|
if result_dataset.data is not None:
|
|
851
842
|
result_dataset.data.rename(columns={measure.name: component.name}, inplace=True)
|
|
852
843
|
elif (
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
844
|
+
changed_allowed is False
|
|
845
|
+
and is_mono_measure is False
|
|
846
|
+
and operand_type.promotion_changed_type(result_data_type)
|
|
856
847
|
):
|
|
857
848
|
raise SemanticError("1-1-1-4", op=cls.op)
|
|
858
849
|
else:
|
|
@@ -860,7 +851,6 @@ class Unary(Operator):
|
|
|
860
851
|
|
|
861
852
|
@classmethod
|
|
862
853
|
def evaluate(cls, operand: ALL_MODEL_DATA_TYPES) -> Any:
|
|
863
|
-
|
|
864
854
|
if isinstance(operand, Dataset):
|
|
865
855
|
return cls.dataset_evaluation(operand)
|
|
866
856
|
if isinstance(operand, Scalar):
|
|
@@ -870,7 +860,6 @@ class Unary(Operator):
|
|
|
870
860
|
|
|
871
861
|
@classmethod
|
|
872
862
|
def dataset_evaluation(cls, operand: Dataset) -> Dataset:
|
|
873
|
-
|
|
874
863
|
result_dataset = cls.dataset_validation(operand)
|
|
875
864
|
result_data = operand.data.copy() if operand.data is not None else pd.DataFrame()
|
|
876
865
|
for measure_name in operand.get_measures_names():
|
|
@@ -885,14 +874,12 @@ class Unary(Operator):
|
|
|
885
874
|
|
|
886
875
|
@classmethod
|
|
887
876
|
def scalar_evaluation(cls, operand: Scalar) -> Scalar:
|
|
888
|
-
|
|
889
877
|
result_scalar = cls.scalar_validation(operand)
|
|
890
878
|
result_scalar.value = cls.op_func(operand.value)
|
|
891
879
|
return result_scalar
|
|
892
880
|
|
|
893
881
|
@classmethod
|
|
894
882
|
def component_evaluation(cls, operand: DataComponent) -> DataComponent:
|
|
895
|
-
|
|
896
883
|
result_component = cls.component_validation(operand)
|
|
897
884
|
result_component.data = cls.apply_operation_component(
|
|
898
885
|
operand.data.copy() if operand.data is not None else pd.Series()
|
vtlengine/Utils/__init__.py
CHANGED
|
@@ -339,7 +339,12 @@ REGULAR_AGGREGATION_MAPPING = {
|
|
|
339
339
|
APPLY: Apply,
|
|
340
340
|
}
|
|
341
341
|
|
|
342
|
-
SET_MAPPING = {
|
|
342
|
+
SET_MAPPING = {
|
|
343
|
+
UNION: Union,
|
|
344
|
+
INTERSECT: Intersection,
|
|
345
|
+
SYMDIFF: Symdiff,
|
|
346
|
+
SETDIFF: Setdiff,
|
|
347
|
+
}
|
|
343
348
|
|
|
344
349
|
AGGREGATION_MAPPING = {
|
|
345
350
|
MAX: Max,
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
|
+
from typing import Union
|
|
2
3
|
|
|
3
4
|
from vtlengine.DataTypes import TimePeriod
|
|
4
5
|
from vtlengine.DataTypes.TimeHandling import TimePeriodHandler
|
|
@@ -23,7 +24,7 @@ def _format_vtl_representation(value: str) -> str:
|
|
|
23
24
|
|
|
24
25
|
|
|
25
26
|
def format_time_period_external_representation(
|
|
26
|
-
dataset: Dataset
|
|
27
|
+
dataset: Union[Dataset, Scalar], mode: TimePeriodRepresentation
|
|
27
28
|
) -> None:
|
|
28
29
|
"""
|
|
29
30
|
From SDMX time period representation to standard VTL representation (no hyphen).
|
|
@@ -17,10 +17,14 @@ from vtlengine.DataTypes import (
|
|
|
17
17
|
TimeInterval,
|
|
18
18
|
TimePeriod,
|
|
19
19
|
)
|
|
20
|
-
from vtlengine.DataTypes.TimeHandling import
|
|
20
|
+
from vtlengine.DataTypes.TimeHandling import PERIOD_IND_MAPPING
|
|
21
21
|
from vtlengine.Exceptions import InputValidationException, SemanticError
|
|
22
22
|
from vtlengine.files.parser._rfc_dialect import register_rfc
|
|
23
|
-
from vtlengine.files.parser._time_checking import
|
|
23
|
+
from vtlengine.files.parser._time_checking import (
|
|
24
|
+
check_date,
|
|
25
|
+
check_time,
|
|
26
|
+
check_time_period,
|
|
27
|
+
)
|
|
24
28
|
from vtlengine.Model import Component, Dataset, Role
|
|
25
29
|
|
|
26
30
|
TIME_CHECKS_MAPPING: Dict[Type[ScalarType], Any] = {
|
|
@@ -73,8 +77,11 @@ def _sanitize_pandas_columns(
|
|
|
73
77
|
components: Dict[str, Component], csv_path: Union[str, Path], data: pd.DataFrame
|
|
74
78
|
) -> pd.DataFrame:
|
|
75
79
|
# Fast loading from SDMX-CSV
|
|
76
|
-
if (
|
|
77
|
-
|
|
80
|
+
if (
|
|
81
|
+
"DATAFLOW" in data.columns
|
|
82
|
+
and data.columns[0] == "DATAFLOW"
|
|
83
|
+
and "DATAFLOW" not in components
|
|
84
|
+
):
|
|
78
85
|
data.drop(columns=["DATAFLOW"], inplace=True)
|
|
79
86
|
if "STRUCTURE" in data.columns and data.columns[0] == "STRUCTURE":
|
|
80
87
|
if "STRUCTURE" not in components:
|
|
@@ -107,7 +114,11 @@ def _pandas_load_csv(components: Dict[str, Component], csv_path: Path) -> pd.Dat
|
|
|
107
114
|
|
|
108
115
|
try:
|
|
109
116
|
data = pd.read_csv(
|
|
110
|
-
csv_path,
|
|
117
|
+
csv_path,
|
|
118
|
+
dtype=obj_dtypes,
|
|
119
|
+
engine="c",
|
|
120
|
+
keep_default_na=False,
|
|
121
|
+
na_values=[""],
|
|
111
122
|
)
|
|
112
123
|
except UnicodeDecodeError:
|
|
113
124
|
raise InputValidationException(code="0-1-2-5", file=csv_path.name)
|
|
@@ -121,7 +132,11 @@ def _pandas_load_s3_csv(components: Dict[str, Component], csv_path: str) -> pd.D
|
|
|
121
132
|
# start = time()
|
|
122
133
|
try:
|
|
123
134
|
data = pd.read_csv(
|
|
124
|
-
csv_path,
|
|
135
|
+
csv_path,
|
|
136
|
+
dtype=obj_dtypes,
|
|
137
|
+
engine="c",
|
|
138
|
+
keep_default_na=False,
|
|
139
|
+
na_values=[""],
|
|
125
140
|
)
|
|
126
141
|
|
|
127
142
|
except UnicodeDecodeError:
|
|
@@ -165,7 +180,6 @@ def _validate_pandas(
|
|
|
165
180
|
comp_name = ""
|
|
166
181
|
comp = None
|
|
167
182
|
try:
|
|
168
|
-
|
|
169
183
|
for comp_name, comp in components.items():
|
|
170
184
|
if comp.data_type in (Date, TimePeriod, TimeInterval):
|
|
171
185
|
data[comp_name] = data[comp_name].map(
|
|
@@ -184,16 +198,34 @@ def _validate_pandas(
|
|
|
184
198
|
elif comp.data_type == Duration:
|
|
185
199
|
values_correct = (
|
|
186
200
|
data[comp_name]
|
|
187
|
-
.map(
|
|
201
|
+
.map(
|
|
202
|
+
lambda x: Duration.validate_duration(x),
|
|
203
|
+
na_action="ignore",
|
|
204
|
+
)
|
|
188
205
|
.all()
|
|
189
206
|
)
|
|
190
207
|
if not values_correct:
|
|
191
|
-
|
|
208
|
+
try:
|
|
209
|
+
values_correct = (
|
|
210
|
+
data[comp_name]
|
|
211
|
+
.map(
|
|
212
|
+
lambda x: x.replace(" ", "") in PERIOD_IND_MAPPING,
|
|
213
|
+
na_action="ignore",
|
|
214
|
+
)
|
|
215
|
+
.all()
|
|
216
|
+
)
|
|
217
|
+
if not values_correct:
|
|
218
|
+
raise ValueError(
|
|
219
|
+
f"Duration values are not correct in column {comp_name}"
|
|
220
|
+
)
|
|
221
|
+
except ValueError:
|
|
222
|
+
raise ValueError(f"Duration values are not correct in column {comp_name}")
|
|
192
223
|
else:
|
|
193
224
|
data[comp_name] = data[comp_name].map(
|
|
194
225
|
lambda x: str(x).replace('"', ""), na_action="ignore"
|
|
195
226
|
)
|
|
196
227
|
data[comp_name] = data[comp_name].astype(np.object_, errors="raise")
|
|
228
|
+
|
|
197
229
|
except ValueError:
|
|
198
230
|
str_comp = SCALAR_TYPES_CLASS_REVERSE[comp.data_type] if comp else "Null"
|
|
199
231
|
raise SemanticError("0-1-1-12", name=dataset_name, column=comp_name, type=str_comp)
|
|
@@ -202,7 +234,9 @@ def _validate_pandas(
|
|
|
202
234
|
|
|
203
235
|
|
|
204
236
|
def load_datapoints(
|
|
205
|
-
components: Dict[str, Component],
|
|
237
|
+
components: Dict[str, Component],
|
|
238
|
+
dataset_name: str,
|
|
239
|
+
csv_path: Optional[Union[Path, str]] = None,
|
|
206
240
|
) -> pd.DataFrame:
|
|
207
241
|
if csv_path is None or (isinstance(csv_path, Path) and not csv_path.exists()):
|
|
208
242
|
return pd.DataFrame(columns=list(components.keys()))
|
|
@@ -21,16 +21,16 @@ def check_date(value: str) -> str:
|
|
|
21
21
|
raise InputValidationException(f"Date {value} is out of range for the month.")
|
|
22
22
|
if "month must be in 1..12" in str(e):
|
|
23
23
|
raise InputValidationException(
|
|
24
|
-
f"Date {value} is invalid.
|
|
24
|
+
f"Date {value} is invalid. Month must be between 1 and 12."
|
|
25
25
|
)
|
|
26
26
|
raise InputValidationException(
|
|
27
|
-
f"Date {value} is not in the correct format.
|
|
27
|
+
f"Date {value} is not in the correct format. Use YYYY-MM-DD."
|
|
28
28
|
)
|
|
29
29
|
|
|
30
30
|
# Check date is between 1900 and 9999
|
|
31
31
|
if not 1800 <= date_value.year <= 9999:
|
|
32
32
|
raise InputValidationException(
|
|
33
|
-
f"Date {value} is invalid.
|
|
33
|
+
f"Date {value} is invalid. Year must be between 1900 and 9999."
|
|
34
34
|
)
|
|
35
35
|
|
|
36
36
|
return date_value.isoformat()
|
|
@@ -68,7 +68,7 @@ def check_time(value: str) -> str:
|
|
|
68
68
|
raise ValueError("Start date is greater than end date.")
|
|
69
69
|
return value
|
|
70
70
|
raise ValueError(
|
|
71
|
-
"Time is not in the correct format.
|
|
71
|
+
"Time is not in the correct format. Use YYYY-MM-DD/YYYY-MM-DD or YYYY or YYYY-MM."
|
|
72
72
|
)
|
|
73
73
|
|
|
74
74
|
|
|
@@ -1,34 +1,36 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: vtlengine
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.4
|
|
4
4
|
Summary: Run and Validate VTL Scripts
|
|
5
5
|
License: AGPL-3.0
|
|
6
6
|
Keywords: vtl,sdmx,vtlengine,Validation and Transformation Language
|
|
7
7
|
Author: MeaningfulData
|
|
8
8
|
Author-email: info@meaningfuldata.eu
|
|
9
|
-
Requires-Python: >=3.
|
|
9
|
+
Requires-Python: >=3.9,<4.0
|
|
10
10
|
Classifier: Development Status :: 5 - Production/Stable
|
|
11
11
|
Classifier: Intended Audience :: Developers
|
|
12
12
|
Classifier: Intended Audience :: Information Technology
|
|
13
13
|
Classifier: Intended Audience :: Science/Research
|
|
14
14
|
Classifier: License :: OSI Approved :: GNU Affero General Public License v3
|
|
15
15
|
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
17
|
Classifier: Programming Language :: Python :: 3.10
|
|
17
18
|
Classifier: Programming Language :: Python :: 3.11
|
|
18
19
|
Classifier: Programming Language :: Python :: 3.12
|
|
19
20
|
Classifier: Programming Language :: Python :: 3.13
|
|
20
21
|
Classifier: Typing :: Typed
|
|
21
22
|
Requires-Dist: antlr4-python3-runtime (==4.9.2)
|
|
22
|
-
Requires-Dist:
|
|
23
|
-
Requires-Dist:
|
|
23
|
+
Requires-Dist: duckdb (>=1.1,<2.0)
|
|
24
|
+
Requires-Dist: jsonschema (>=4.23.0,<5.0.0)
|
|
24
25
|
Requires-Dist: networkx (>=2.8.8,<3.0.0)
|
|
25
26
|
Requires-Dist: numexpr (>=2.9.0,<3.0.0)
|
|
26
27
|
Requires-Dist: pandas (>=2.1.4,<3.0.0)
|
|
27
|
-
Requires-Dist: s3fs (>=
|
|
28
|
+
Requires-Dist: s3fs (>=2025.2.0,<2026.0.0)
|
|
28
29
|
Requires-Dist: sqlglot (>=22.2.0,<23.0.0)
|
|
29
30
|
Project-URL: Authors, https://github.com/Meaningful-Data/vtlengine/graphs/contributors
|
|
30
|
-
Project-URL: BugTracker, https://github.com/Meaningful-Data/vtlengine/issues
|
|
31
31
|
Project-URL: Documentation, https://docs.vtlengine.meaningfuldata.eu
|
|
32
|
+
Project-URL: IssueTracker, https://github.com/Meaningful-Data/vtlengine/issues
|
|
33
|
+
Project-URL: MeaningfulData, https://www.meaningfuldata.eu/
|
|
32
34
|
Project-URL: Repository, https://github.com/Meaningful-Data/vtlengine
|
|
33
35
|
Description-Content-Type: text/markdown
|
|
34
36
|
|