vtlengine 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +19 -8
- vtlengine/API/__init__.py +9 -9
- vtlengine/AST/ASTConstructor.py +23 -43
- vtlengine/AST/ASTConstructorModules/Expr.py +147 -71
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +104 -40
- vtlengine/AST/ASTConstructorModules/Terminals.py +28 -39
- vtlengine/AST/ASTTemplate.py +16 -1
- vtlengine/AST/DAG/__init__.py +12 -15
- vtlengine/AST/Grammar/Vtl.g4 +49 -20
- vtlengine/AST/Grammar/VtlTokens.g4 +13 -1
- vtlengine/AST/Grammar/lexer.py +1293 -1183
- vtlengine/AST/Grammar/parser.py +5758 -3939
- vtlengine/AST/Grammar/tokens.py +12 -0
- vtlengine/AST/VtlVisitor.py +9 -2
- vtlengine/AST/__init__.py +21 -3
- vtlengine/DataTypes/TimeHandling.py +12 -7
- vtlengine/DataTypes/__init__.py +17 -24
- vtlengine/Exceptions/__init__.py +43 -1
- vtlengine/Exceptions/messages.py +82 -62
- vtlengine/Interpreter/__init__.py +125 -120
- vtlengine/Model/__init__.py +17 -12
- vtlengine/Operators/Aggregation.py +14 -14
- vtlengine/Operators/Analytic.py +56 -31
- vtlengine/Operators/Assignment.py +2 -3
- vtlengine/Operators/Boolean.py +5 -7
- vtlengine/Operators/CastOperator.py +12 -13
- vtlengine/Operators/Clause.py +11 -13
- vtlengine/Operators/Comparison.py +31 -17
- vtlengine/Operators/Conditional.py +157 -17
- vtlengine/Operators/General.py +4 -4
- vtlengine/Operators/HROperators.py +41 -34
- vtlengine/Operators/Join.py +18 -22
- vtlengine/Operators/Numeric.py +76 -39
- vtlengine/Operators/RoleSetter.py +6 -8
- vtlengine/Operators/Set.py +7 -12
- vtlengine/Operators/String.py +19 -27
- vtlengine/Operators/Time.py +366 -43
- vtlengine/Operators/Validation.py +4 -7
- vtlengine/Operators/__init__.py +38 -41
- vtlengine/Utils/__init__.py +149 -94
- vtlengine/__init__.py +1 -1
- vtlengine/files/output/__init__.py +2 -2
- vtlengine/files/output/_time_period_representation.py +0 -1
- vtlengine/files/parser/__init__.py +18 -18
- vtlengine/files/parser/_time_checking.py +3 -2
- {vtlengine-1.0.1.dist-info → vtlengine-1.0.3.dist-info}/METADATA +17 -5
- vtlengine-1.0.3.dist-info/RECORD +58 -0
- vtlengine-1.0.1.dist-info/RECORD +0 -58
- {vtlengine-1.0.1.dist-info → vtlengine-1.0.3.dist-info}/LICENSE.md +0 -0
- {vtlengine-1.0.1.dist-info → vtlengine-1.0.3.dist-info}/WHEEL +0 -0
vtlengine/Operators/__init__.py
CHANGED
|
@@ -1,31 +1,29 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from copy import copy
|
|
3
|
-
from typing import Any,
|
|
3
|
+
from typing import Any, Optional, Union
|
|
4
4
|
|
|
5
|
+
# if os.environ.get("SPARK", False):
|
|
6
|
+
# import pyspark.pandas as pd
|
|
7
|
+
# else:
|
|
8
|
+
# import pandas as pd
|
|
9
|
+
import pandas as pd
|
|
10
|
+
|
|
11
|
+
from vtlengine.AST.Grammar.tokens import AND, CEIL, EQ, FLOOR, GT, GTE, LT, LTE, NEQ, OR, ROUND, XOR
|
|
5
12
|
from vtlengine.DataTypes import (
|
|
6
13
|
COMP_NAME_MAPPING,
|
|
14
|
+
SCALAR_TYPES_CLASS_REVERSE,
|
|
7
15
|
binary_implicit_promotion,
|
|
8
16
|
check_binary_implicit_promotion,
|
|
9
17
|
check_unary_implicit_promotion,
|
|
10
18
|
unary_implicit_promotion,
|
|
11
|
-
SCALAR_TYPES_CLASS_REVERSE,
|
|
12
19
|
)
|
|
13
20
|
from vtlengine.DataTypes.TimeHandling import (
|
|
21
|
+
DURATION_MAPPING,
|
|
14
22
|
TimeIntervalHandler,
|
|
15
23
|
TimePeriodHandler,
|
|
16
|
-
DURATION_MAPPING,
|
|
17
24
|
)
|
|
18
|
-
|
|
19
|
-
from vtlengine.AST.Grammar.tokens import CEIL, FLOOR, ROUND, EQ, NEQ, GT, GTE, LT, LTE, XOR, OR, AND
|
|
20
25
|
from vtlengine.Exceptions import SemanticError
|
|
21
|
-
|
|
22
|
-
# if os.environ.get("SPARK", False):
|
|
23
|
-
# import pyspark.pandas as pd
|
|
24
|
-
# else:
|
|
25
|
-
# import pandas as pd
|
|
26
|
-
import pandas as pd
|
|
27
|
-
|
|
28
|
-
from vtlengine.Model import Component, Dataset, Role, Scalar, DataComponent, ScalarSet
|
|
26
|
+
from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar, ScalarSet
|
|
29
27
|
|
|
30
28
|
ALL_MODEL_DATA_TYPES = Union[Dataset, Scalar, DataComponent]
|
|
31
29
|
|
|
@@ -167,13 +165,12 @@ class Operator:
|
|
|
167
165
|
|
|
168
166
|
|
|
169
167
|
def _id_type_promotion_join_keys(
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
168
|
+
c_left: Component,
|
|
169
|
+
c_right: Component,
|
|
170
|
+
join_key: str,
|
|
171
|
+
left_data: Optional[pd.DataFrame] = None,
|
|
172
|
+
right_data: Optional[pd.DataFrame] = None,
|
|
175
173
|
) -> None:
|
|
176
|
-
|
|
177
174
|
if left_data is None:
|
|
178
175
|
left_data = pd.DataFrame()
|
|
179
176
|
if right_data is None:
|
|
@@ -187,7 +184,7 @@ def _id_type_promotion_join_keys(
|
|
|
187
184
|
right_data[join_key] = right_data[join_key].astype(object)
|
|
188
185
|
return
|
|
189
186
|
if (left_type_name == "Integer" and right_type_name == "Number") or (
|
|
190
|
-
|
|
187
|
+
left_type_name == "Number" and right_type_name == "Integer"
|
|
191
188
|
):
|
|
192
189
|
left_data[join_key] = left_data[join_key].map(lambda x: int(float(x)))
|
|
193
190
|
right_data[join_key] = right_data[join_key].map(lambda x: int(float(x)))
|
|
@@ -200,7 +197,6 @@ def _id_type_promotion_join_keys(
|
|
|
200
197
|
|
|
201
198
|
|
|
202
199
|
def _handle_str_number(x: Union[str, int, float]) -> Union[str, int, float]:
|
|
203
|
-
|
|
204
200
|
if isinstance(x, int):
|
|
205
201
|
return x
|
|
206
202
|
try:
|
|
@@ -238,10 +234,10 @@ class Binary(Operator):
|
|
|
238
234
|
|
|
239
235
|
@classmethod
|
|
240
236
|
def apply_operation_series_scalar(
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
237
|
+
cls,
|
|
238
|
+
series: Any,
|
|
239
|
+
scalar: Scalar,
|
|
240
|
+
series_left: bool,
|
|
245
241
|
) -> Any:
|
|
246
242
|
|
|
247
243
|
if scalar is None:
|
|
@@ -353,7 +349,8 @@ class Binary(Operator):
|
|
|
353
349
|
|
|
354
350
|
if not cls.validate_type_compatibility(left_operand.data_type, right_operand.data_type):
|
|
355
351
|
raise SemanticError(
|
|
356
|
-
"1-1-1-2",
|
|
352
|
+
"1-1-1-2", type_1=left_operand.data_type, type_2=right_operand.data_type,
|
|
353
|
+
type_check=cls.type_to_check
|
|
357
354
|
)
|
|
358
355
|
return Scalar(
|
|
359
356
|
name="result",
|
|
@@ -363,7 +360,7 @@ class Binary(Operator):
|
|
|
363
360
|
|
|
364
361
|
@classmethod
|
|
365
362
|
def component_validation(
|
|
366
|
-
|
|
363
|
+
cls, left_operand: DataComponent, right_operand: DataComponent
|
|
367
364
|
) -> DataComponent:
|
|
368
365
|
"""
|
|
369
366
|
Validates the compatibility between the types of the components and the operator
|
|
@@ -415,7 +412,7 @@ class Binary(Operator):
|
|
|
415
412
|
|
|
416
413
|
@classmethod
|
|
417
414
|
def component_set_validation(
|
|
418
|
-
|
|
415
|
+
cls, component: DataComponent, scalar_set: ScalarSet
|
|
419
416
|
) -> DataComponent:
|
|
420
417
|
|
|
421
418
|
cls.type_validation(component.data_type, scalar_set.data_type)
|
|
@@ -471,7 +468,7 @@ class Binary(Operator):
|
|
|
471
468
|
|
|
472
469
|
@classmethod
|
|
473
470
|
def apply_return_type_dataset(
|
|
474
|
-
|
|
471
|
+
cls, result_dataset: Dataset, left_operand: Any, right_operand: Any
|
|
475
472
|
) -> None:
|
|
476
473
|
"""
|
|
477
474
|
Used in dataset's validation.
|
|
@@ -501,9 +498,9 @@ class Binary(Operator):
|
|
|
501
498
|
if result_dataset.data is not None:
|
|
502
499
|
result_dataset.data.rename(columns={measure.name: component.name}, inplace=True)
|
|
503
500
|
elif (
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
501
|
+
changed_allowed is False
|
|
502
|
+
and is_mono_measure is False
|
|
503
|
+
and left_type.promotion_changed_type(result_data_type)
|
|
507
504
|
):
|
|
508
505
|
raise SemanticError("1-1-1-4", op=cls.op)
|
|
509
506
|
else:
|
|
@@ -597,7 +594,7 @@ class Binary(Operator):
|
|
|
597
594
|
|
|
598
595
|
@classmethod
|
|
599
596
|
def dataset_scalar_evaluation(
|
|
600
|
-
|
|
597
|
+
cls, dataset: Dataset, scalar: Scalar, dataset_left: bool = True
|
|
601
598
|
) -> Dataset:
|
|
602
599
|
|
|
603
600
|
result_dataset = cls.dataset_scalar_validation(dataset, scalar)
|
|
@@ -609,7 +606,7 @@ class Binary(Operator):
|
|
|
609
606
|
for measure in dataset.get_measures():
|
|
610
607
|
measure_data = cls.cast_time_types(measure.data_type, result_data[measure.name].copy())
|
|
611
608
|
if measure.data_type.__name__.__str__() == "Duration" and not isinstance(
|
|
612
|
-
|
|
609
|
+
scalar_value, int
|
|
613
610
|
):
|
|
614
611
|
scalar_value = DURATION_MAPPING[scalar_value]
|
|
615
612
|
result_dataset.data[measure.name] = cls.apply_operation_series_scalar(
|
|
@@ -624,7 +621,7 @@ class Binary(Operator):
|
|
|
624
621
|
|
|
625
622
|
@classmethod
|
|
626
623
|
def component_evaluation(
|
|
627
|
-
|
|
624
|
+
cls, left_operand: DataComponent, right_operand: DataComponent
|
|
628
625
|
) -> DataComponent:
|
|
629
626
|
|
|
630
627
|
result_component = cls.component_validation(left_operand, right_operand)
|
|
@@ -641,7 +638,7 @@ class Binary(Operator):
|
|
|
641
638
|
|
|
642
639
|
@classmethod
|
|
643
640
|
def component_scalar_evaluation(
|
|
644
|
-
|
|
641
|
+
cls, component: DataComponent, scalar: Scalar, component_left: bool = True
|
|
645
642
|
) -> DataComponent:
|
|
646
643
|
|
|
647
644
|
result_component = cls.component_scalar_validation(component, scalar)
|
|
@@ -651,7 +648,7 @@ class Binary(Operator):
|
|
|
651
648
|
)
|
|
652
649
|
scalar_value = cls.cast_time_types_scalar(scalar.data_type, scalar.value)
|
|
653
650
|
if component.data_type.__name__.__str__() == "Duration" and not isinstance(
|
|
654
|
-
|
|
651
|
+
scalar_value, int
|
|
655
652
|
):
|
|
656
653
|
scalar_value = DURATION_MAPPING[scalar_value]
|
|
657
654
|
result_component.data = cls.apply_operation_series_scalar(
|
|
@@ -679,7 +676,7 @@ class Binary(Operator):
|
|
|
679
676
|
|
|
680
677
|
@classmethod
|
|
681
678
|
def component_set_evaluation(
|
|
682
|
-
|
|
679
|
+
cls, component: DataComponent, scalar_set: ScalarSet
|
|
683
680
|
) -> DataComponent:
|
|
684
681
|
|
|
685
682
|
result_component = cls.component_set_validation(component, scalar_set)
|
|
@@ -853,9 +850,9 @@ class Unary(Operator):
|
|
|
853
850
|
if result_dataset.data is not None:
|
|
854
851
|
result_dataset.data.rename(columns={measure.name: component.name}, inplace=True)
|
|
855
852
|
elif (
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
853
|
+
changed_allowed is False
|
|
854
|
+
and is_mono_measure is False
|
|
855
|
+
and operand_type.promotion_changed_type(result_data_type)
|
|
859
856
|
):
|
|
860
857
|
raise SemanticError("1-1-1-4", op=cls.op)
|
|
861
858
|
else:
|
vtlengine/Utils/__init__.py
CHANGED
|
@@ -1,3 +1,101 @@
|
|
|
1
|
+
from typing import Any, Dict
|
|
2
|
+
|
|
3
|
+
from vtlengine.AST.Grammar.tokens import (
|
|
4
|
+
ABS,
|
|
5
|
+
AGGREGATE,
|
|
6
|
+
AND,
|
|
7
|
+
APPLY,
|
|
8
|
+
AS,
|
|
9
|
+
ATTRIBUTE,
|
|
10
|
+
AVG,
|
|
11
|
+
CALC,
|
|
12
|
+
CEIL,
|
|
13
|
+
CHARSET_MATCH,
|
|
14
|
+
CONCAT,
|
|
15
|
+
COUNT,
|
|
16
|
+
CROSS_JOIN,
|
|
17
|
+
DATE_ADD,
|
|
18
|
+
DATEDIFF,
|
|
19
|
+
DAYOFMONTH,
|
|
20
|
+
DAYOFYEAR,
|
|
21
|
+
DAYTOMONTH,
|
|
22
|
+
DAYTOYEAR,
|
|
23
|
+
DIV,
|
|
24
|
+
DROP,
|
|
25
|
+
EQ,
|
|
26
|
+
EXP,
|
|
27
|
+
FILL_TIME_SERIES,
|
|
28
|
+
FILTER,
|
|
29
|
+
FIRST_VALUE,
|
|
30
|
+
FLOOR,
|
|
31
|
+
FLOW_TO_STOCK,
|
|
32
|
+
FULL_JOIN,
|
|
33
|
+
GT,
|
|
34
|
+
GTE,
|
|
35
|
+
IDENTIFIER,
|
|
36
|
+
IN,
|
|
37
|
+
INNER_JOIN,
|
|
38
|
+
INTERSECT,
|
|
39
|
+
ISNULL,
|
|
40
|
+
KEEP,
|
|
41
|
+
LAG,
|
|
42
|
+
LAST_VALUE,
|
|
43
|
+
LCASE,
|
|
44
|
+
LEAD,
|
|
45
|
+
LEFT_JOIN,
|
|
46
|
+
LEN,
|
|
47
|
+
LN,
|
|
48
|
+
LOG,
|
|
49
|
+
LT,
|
|
50
|
+
LTE,
|
|
51
|
+
LTRIM,
|
|
52
|
+
MAX,
|
|
53
|
+
MEASURE,
|
|
54
|
+
MEDIAN,
|
|
55
|
+
MEMBERSHIP,
|
|
56
|
+
MIN,
|
|
57
|
+
MINUS,
|
|
58
|
+
MOD,
|
|
59
|
+
MONTH,
|
|
60
|
+
MONTHTODAY,
|
|
61
|
+
MULT,
|
|
62
|
+
NEQ,
|
|
63
|
+
NOT,
|
|
64
|
+
NOT_IN,
|
|
65
|
+
NVL,
|
|
66
|
+
OR,
|
|
67
|
+
PERIOD_INDICATOR,
|
|
68
|
+
PIVOT,
|
|
69
|
+
PLUS,
|
|
70
|
+
POWER,
|
|
71
|
+
RANDOM,
|
|
72
|
+
RANK,
|
|
73
|
+
RATIO_TO_REPORT,
|
|
74
|
+
RENAME,
|
|
75
|
+
REPLACE,
|
|
76
|
+
ROUND,
|
|
77
|
+
RTRIM,
|
|
78
|
+
SETDIFF,
|
|
79
|
+
SQRT,
|
|
80
|
+
STDDEV_POP,
|
|
81
|
+
STDDEV_SAMP,
|
|
82
|
+
STOCK_TO_FLOW,
|
|
83
|
+
SUBSPACE,
|
|
84
|
+
SUBSTR,
|
|
85
|
+
SUM,
|
|
86
|
+
SYMDIFF,
|
|
87
|
+
TIMESHIFT,
|
|
88
|
+
TRIM,
|
|
89
|
+
TRUNC,
|
|
90
|
+
UCASE,
|
|
91
|
+
UNION,
|
|
92
|
+
UNPIVOT,
|
|
93
|
+
VAR_POP,
|
|
94
|
+
VAR_SAMP,
|
|
95
|
+
XOR,
|
|
96
|
+
YEAR,
|
|
97
|
+
YEARTODAY,
|
|
98
|
+
)
|
|
1
99
|
from vtlengine.Operators.Aggregation import (
|
|
2
100
|
Avg,
|
|
3
101
|
Count,
|
|
@@ -12,20 +110,40 @@ from vtlengine.Operators.Aggregation import (
|
|
|
12
110
|
)
|
|
13
111
|
from vtlengine.Operators.Analytic import (
|
|
14
112
|
Avg as AvgAnalytic,
|
|
113
|
+
)
|
|
114
|
+
from vtlengine.Operators.Analytic import (
|
|
15
115
|
Count as CountAnalytic,
|
|
116
|
+
)
|
|
117
|
+
from vtlengine.Operators.Analytic import (
|
|
16
118
|
FirstValue,
|
|
17
119
|
Lag,
|
|
18
120
|
LastValue,
|
|
19
121
|
Lead,
|
|
122
|
+
Rank,
|
|
123
|
+
RatioToReport,
|
|
124
|
+
)
|
|
125
|
+
from vtlengine.Operators.Analytic import (
|
|
20
126
|
Max as MaxAnalytic,
|
|
127
|
+
)
|
|
128
|
+
from vtlengine.Operators.Analytic import (
|
|
21
129
|
Median as MedianAnalytic,
|
|
130
|
+
)
|
|
131
|
+
from vtlengine.Operators.Analytic import (
|
|
22
132
|
Min as MinAnalytic,
|
|
133
|
+
)
|
|
134
|
+
from vtlengine.Operators.Analytic import (
|
|
23
135
|
PopulationStandardDeviation as PopulationStandardDeviationAnalytic,
|
|
136
|
+
)
|
|
137
|
+
from vtlengine.Operators.Analytic import (
|
|
24
138
|
PopulationVariance as PopulationVarianceAnalytic,
|
|
25
|
-
|
|
26
|
-
|
|
139
|
+
)
|
|
140
|
+
from vtlengine.Operators.Analytic import (
|
|
27
141
|
SampleStandardDeviation as SampleStandardDeviationAnalytic,
|
|
142
|
+
)
|
|
143
|
+
from vtlengine.Operators.Analytic import (
|
|
28
144
|
SampleVariance as SampleVarianceAnalytic,
|
|
145
|
+
)
|
|
146
|
+
from vtlengine.Operators.Analytic import (
|
|
29
147
|
Sum as SumAnalytic,
|
|
30
148
|
)
|
|
31
149
|
from vtlengine.Operators.Boolean import And, Not, Or, Xor
|
|
@@ -48,22 +166,22 @@ from vtlengine.Operators.Comparison import (
|
|
|
48
166
|
IsNull,
|
|
49
167
|
Less,
|
|
50
168
|
LessEqual,
|
|
169
|
+
Match,
|
|
51
170
|
NotEqual,
|
|
52
171
|
NotIn,
|
|
53
|
-
Match,
|
|
54
172
|
)
|
|
55
173
|
from vtlengine.Operators.Conditional import Nvl
|
|
56
174
|
from vtlengine.Operators.General import Alias, Membership
|
|
57
175
|
from vtlengine.Operators.HROperators import (
|
|
176
|
+
HRBinMinus,
|
|
177
|
+
HRBinPlus,
|
|
58
178
|
HREqual,
|
|
59
179
|
HRGreater,
|
|
60
180
|
HRGreaterEqual,
|
|
61
181
|
HRLess,
|
|
62
182
|
HRLessEqual,
|
|
63
|
-
HRBinPlus,
|
|
64
|
-
HRBinMinus,
|
|
65
|
-
HRUnPlus,
|
|
66
183
|
HRUnMinus,
|
|
184
|
+
HRUnPlus,
|
|
67
185
|
)
|
|
68
186
|
from vtlengine.Operators.Join import Apply, CrossJoin, FullJoin, InnerJoin, LeftJoin
|
|
69
187
|
from vtlengine.Operators.Numeric import (
|
|
@@ -79,6 +197,7 @@ from vtlengine.Operators.Numeric import (
|
|
|
79
197
|
Mult,
|
|
80
198
|
NaturalLogarithm,
|
|
81
199
|
Power,
|
|
200
|
+
Random,
|
|
82
201
|
Round,
|
|
83
202
|
SquareRoot,
|
|
84
203
|
Trunc,
|
|
@@ -99,100 +218,24 @@ from vtlengine.Operators.String import (
|
|
|
99
218
|
Upper,
|
|
100
219
|
)
|
|
101
220
|
from vtlengine.Operators.Time import (
|
|
221
|
+
Date_Add,
|
|
222
|
+
Date_Diff,
|
|
223
|
+
Day_of_Month,
|
|
224
|
+
Day_of_Year,
|
|
225
|
+
Day_to_Month,
|
|
226
|
+
Day_to_Year,
|
|
227
|
+
Fill_time_series,
|
|
102
228
|
Flow_to_stock,
|
|
229
|
+
Month,
|
|
230
|
+
Month_to_Day,
|
|
103
231
|
Period_indicator,
|
|
104
232
|
Stock_to_flow,
|
|
105
|
-
Fill_time_series,
|
|
106
233
|
Time_Shift,
|
|
234
|
+
Year,
|
|
235
|
+
Year_to_Day,
|
|
107
236
|
)
|
|
108
237
|
|
|
109
|
-
|
|
110
|
-
MEMBERSHIP,
|
|
111
|
-
AND,
|
|
112
|
-
OR,
|
|
113
|
-
XOR,
|
|
114
|
-
EQ,
|
|
115
|
-
NEQ,
|
|
116
|
-
GT,
|
|
117
|
-
GTE,
|
|
118
|
-
LT,
|
|
119
|
-
LTE,
|
|
120
|
-
IN,
|
|
121
|
-
NOT_IN,
|
|
122
|
-
NVL,
|
|
123
|
-
PLUS,
|
|
124
|
-
MINUS,
|
|
125
|
-
MULT,
|
|
126
|
-
LOG,
|
|
127
|
-
MOD,
|
|
128
|
-
POWER,
|
|
129
|
-
DIV,
|
|
130
|
-
AS,
|
|
131
|
-
CONCAT,
|
|
132
|
-
TIMESHIFT,
|
|
133
|
-
CHARSET_MATCH,
|
|
134
|
-
NOT,
|
|
135
|
-
ABS,
|
|
136
|
-
EXP,
|
|
137
|
-
LN,
|
|
138
|
-
SQRT,
|
|
139
|
-
CEIL,
|
|
140
|
-
FLOOR,
|
|
141
|
-
ISNULL,
|
|
142
|
-
PERIOD_INDICATOR,
|
|
143
|
-
LEN,
|
|
144
|
-
LCASE,
|
|
145
|
-
LTRIM,
|
|
146
|
-
RTRIM,
|
|
147
|
-
TRIM,
|
|
148
|
-
UCASE,
|
|
149
|
-
FLOW_TO_STOCK,
|
|
150
|
-
STOCK_TO_FLOW,
|
|
151
|
-
ROUND,
|
|
152
|
-
TRUNC,
|
|
153
|
-
SUBSTR,
|
|
154
|
-
REPLACE,
|
|
155
|
-
FILL_TIME_SERIES,
|
|
156
|
-
IDENTIFIER,
|
|
157
|
-
ATTRIBUTE,
|
|
158
|
-
MEASURE,
|
|
159
|
-
CALC,
|
|
160
|
-
FILTER,
|
|
161
|
-
KEEP,
|
|
162
|
-
DROP,
|
|
163
|
-
RENAME,
|
|
164
|
-
PIVOT,
|
|
165
|
-
UNPIVOT,
|
|
166
|
-
SUBSPACE,
|
|
167
|
-
AGGREGATE,
|
|
168
|
-
APPLY,
|
|
169
|
-
UNION,
|
|
170
|
-
INTERSECT,
|
|
171
|
-
SYMDIFF,
|
|
172
|
-
SETDIFF,
|
|
173
|
-
MAX,
|
|
174
|
-
MIN,
|
|
175
|
-
SUM,
|
|
176
|
-
COUNT,
|
|
177
|
-
AVG,
|
|
178
|
-
MEDIAN,
|
|
179
|
-
STDDEV_POP,
|
|
180
|
-
STDDEV_SAMP,
|
|
181
|
-
VAR_POP,
|
|
182
|
-
VAR_SAMP,
|
|
183
|
-
LAG,
|
|
184
|
-
LEAD,
|
|
185
|
-
FIRST_VALUE,
|
|
186
|
-
LAST_VALUE,
|
|
187
|
-
RATIO_TO_REPORT,
|
|
188
|
-
RANK,
|
|
189
|
-
INNER_JOIN,
|
|
190
|
-
LEFT_JOIN,
|
|
191
|
-
FULL_JOIN,
|
|
192
|
-
CROSS_JOIN,
|
|
193
|
-
)
|
|
194
|
-
|
|
195
|
-
BINARY_MAPPING = {
|
|
238
|
+
BINARY_MAPPING: Dict[Any, Any] = {
|
|
196
239
|
# General
|
|
197
240
|
MEMBERSHIP: Membership,
|
|
198
241
|
# Boolean
|
|
@@ -218,6 +261,7 @@ BINARY_MAPPING = {
|
|
|
218
261
|
MOD: Modulo,
|
|
219
262
|
POWER: Power,
|
|
220
263
|
DIV: Div,
|
|
264
|
+
RANDOM: Random,
|
|
221
265
|
# General
|
|
222
266
|
AS: Alias,
|
|
223
267
|
# String
|
|
@@ -225,6 +269,7 @@ BINARY_MAPPING = {
|
|
|
225
269
|
# Time
|
|
226
270
|
TIMESHIFT: Time_Shift,
|
|
227
271
|
CHARSET_MATCH: Match,
|
|
272
|
+
DATEDIFF: Date_Diff,
|
|
228
273
|
}
|
|
229
274
|
|
|
230
275
|
UNARY_MAPPING = {
|
|
@@ -253,6 +298,14 @@ UNARY_MAPPING = {
|
|
|
253
298
|
PERIOD_INDICATOR: Period_indicator,
|
|
254
299
|
FLOW_TO_STOCK: Flow_to_stock,
|
|
255
300
|
STOCK_TO_FLOW: Stock_to_flow,
|
|
301
|
+
YEAR: Year,
|
|
302
|
+
MONTH: Month,
|
|
303
|
+
DAYOFMONTH: Day_of_Month,
|
|
304
|
+
DAYOFYEAR: Day_of_Year,
|
|
305
|
+
DAYTOYEAR: Day_to_Year,
|
|
306
|
+
DAYTOMONTH: Day_to_Month,
|
|
307
|
+
YEARTODAY: Year_to_Day,
|
|
308
|
+
MONTHTODAY: Month_to_Day,
|
|
256
309
|
}
|
|
257
310
|
|
|
258
311
|
PARAMETRIC_MAPPING = {
|
|
@@ -264,6 +317,7 @@ PARAMETRIC_MAPPING = {
|
|
|
264
317
|
REPLACE: Replace,
|
|
265
318
|
# Time
|
|
266
319
|
FILL_TIME_SERIES: Fill_time_series,
|
|
320
|
+
DATE_ADD: Date_Add,
|
|
267
321
|
}
|
|
268
322
|
|
|
269
323
|
ROLE_SETTER_MAPPING = {
|
|
@@ -320,6 +374,7 @@ ANALYTIC_MAPPING = {
|
|
|
320
374
|
}
|
|
321
375
|
|
|
322
376
|
THEN_ELSE = {"then": "T", "else": "E"}
|
|
377
|
+
|
|
323
378
|
JOIN_MAPPING = {
|
|
324
379
|
INNER_JOIN: InnerJoin,
|
|
325
380
|
LEFT_JOIN: LeftJoin,
|
vtlengine/__init__.py
CHANGED
|
@@ -3,11 +3,11 @@ from typing import Optional, Union
|
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
|
|
6
|
-
from vtlengine.Model import Dataset
|
|
7
6
|
from vtlengine.files.output._time_period_representation import (
|
|
8
|
-
format_time_period_external_representation,
|
|
9
7
|
TimePeriodRepresentation,
|
|
8
|
+
format_time_period_external_representation,
|
|
10
9
|
)
|
|
10
|
+
from vtlengine.Model import Dataset
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def save_datapoints(
|
|
@@ -1,28 +1,27 @@
|
|
|
1
1
|
import warnings
|
|
2
2
|
from csv import DictReader
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
|
|
5
|
-
from typing import Optional, Dict, Union, Any, Type, List
|
|
4
|
+
from typing import Any, Dict, List, Optional, Type, Union
|
|
6
5
|
|
|
7
6
|
import numpy as np
|
|
8
7
|
import pandas as pd
|
|
8
|
+
|
|
9
9
|
from vtlengine.DataTypes import (
|
|
10
|
+
SCALAR_TYPES_CLASS_REVERSE,
|
|
11
|
+
Boolean,
|
|
10
12
|
Date,
|
|
11
|
-
|
|
12
|
-
TimeInterval,
|
|
13
|
+
Duration,
|
|
13
14
|
Integer,
|
|
14
15
|
Number,
|
|
15
|
-
Boolean,
|
|
16
|
-
Duration,
|
|
17
|
-
SCALAR_TYPES_CLASS_REVERSE,
|
|
18
16
|
ScalarType,
|
|
17
|
+
TimeInterval,
|
|
18
|
+
TimePeriod,
|
|
19
19
|
)
|
|
20
20
|
from vtlengine.DataTypes.TimeHandling import DURATION_MAPPING
|
|
21
|
-
from vtlengine.files.parser._rfc_dialect import register_rfc
|
|
22
|
-
from vtlengine.files.parser._time_checking import check_date, check_time_period, check_time
|
|
23
|
-
|
|
24
21
|
from vtlengine.Exceptions import InputValidationException, SemanticError
|
|
25
|
-
from vtlengine.
|
|
22
|
+
from vtlengine.files.parser._rfc_dialect import register_rfc
|
|
23
|
+
from vtlengine.files.parser._time_checking import check_date, check_time, check_time_period
|
|
24
|
+
from vtlengine.Model import Component, Dataset, Role
|
|
26
25
|
|
|
27
26
|
TIME_CHECKS_MAPPING: Dict[Type[ScalarType], Any] = {
|
|
28
27
|
Date: check_date,
|
|
@@ -74,9 +73,9 @@ def _sanitize_pandas_columns(
|
|
|
74
73
|
components: Dict[str, Component], csv_path: Union[str, Path], data: pd.DataFrame
|
|
75
74
|
) -> pd.DataFrame:
|
|
76
75
|
# Fast loading from SDMX-CSV
|
|
77
|
-
if "DATAFLOW" in data.columns and data.columns[0] == "DATAFLOW"
|
|
78
|
-
|
|
79
|
-
|
|
76
|
+
if ("DATAFLOW" in data.columns and data.columns[0] == "DATAFLOW" and
|
|
77
|
+
"DATAFLOW" not in components):
|
|
78
|
+
data.drop(columns=["DATAFLOW"], inplace=True)
|
|
80
79
|
if "STRUCTURE" in data.columns and data.columns[0] == "STRUCTURE":
|
|
81
80
|
if "STRUCTURE" not in components:
|
|
82
81
|
data.drop(columns=["STRUCTURE"], inplace=True)
|
|
@@ -133,9 +132,10 @@ def _pandas_load_s3_csv(components: Dict[str, Component], csv_path: str) -> pd.D
|
|
|
133
132
|
|
|
134
133
|
|
|
135
134
|
def _parse_boolean(value: str) -> bool:
|
|
136
|
-
if
|
|
137
|
-
return
|
|
138
|
-
|
|
135
|
+
if isinstance(value, bool):
|
|
136
|
+
return value
|
|
137
|
+
result = value.lower() == "true" or value == "1"
|
|
138
|
+
return result
|
|
139
139
|
|
|
140
140
|
|
|
141
141
|
def _validate_pandas(
|
|
@@ -146,7 +146,7 @@ def _validate_pandas(
|
|
|
146
146
|
|
|
147
147
|
id_names = [comp_name for comp_name, comp in components.items() if comp.role == Role.IDENTIFIER]
|
|
148
148
|
|
|
149
|
-
missing_columns = [name for name in components
|
|
149
|
+
missing_columns = [name for name in components if name not in data.columns.tolist()]
|
|
150
150
|
if missing_columns:
|
|
151
151
|
for name in missing_columns:
|
|
152
152
|
if components[name].nullable is False:
|
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
import calendar
|
|
2
2
|
import re
|
|
3
|
-
from datetime import
|
|
3
|
+
from datetime import date, datetime
|
|
4
4
|
|
|
5
5
|
from vtlengine.DataTypes.TimeHandling import TimePeriodHandler
|
|
6
|
-
|
|
7
6
|
from vtlengine.Exceptions import InputValidationException
|
|
8
7
|
|
|
9
8
|
|
|
@@ -93,6 +92,8 @@ further_options_period_pattern = (
|
|
|
93
92
|
|
|
94
93
|
|
|
95
94
|
def check_time_period(value: str) -> str:
|
|
95
|
+
if isinstance(value, int):
|
|
96
|
+
value = str(value)
|
|
96
97
|
value = value.replace(" ", "")
|
|
97
98
|
period_result = re.fullmatch(period_pattern, value)
|
|
98
99
|
if period_result is not None:
|