vtlengine 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +19 -8
- vtlengine/API/__init__.py +9 -9
- vtlengine/AST/ASTConstructor.py +23 -43
- vtlengine/AST/ASTConstructorModules/Expr.py +147 -71
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +104 -40
- vtlengine/AST/ASTConstructorModules/Terminals.py +28 -39
- vtlengine/AST/ASTTemplate.py +16 -1
- vtlengine/AST/DAG/__init__.py +12 -15
- vtlengine/AST/Grammar/Vtl.g4 +49 -20
- vtlengine/AST/Grammar/VtlTokens.g4 +13 -1
- vtlengine/AST/Grammar/lexer.py +1293 -1183
- vtlengine/AST/Grammar/parser.py +5758 -3939
- vtlengine/AST/Grammar/tokens.py +12 -0
- vtlengine/AST/VtlVisitor.py +9 -2
- vtlengine/AST/__init__.py +21 -3
- vtlengine/DataTypes/TimeHandling.py +12 -7
- vtlengine/DataTypes/__init__.py +17 -24
- vtlengine/Exceptions/__init__.py +43 -1
- vtlengine/Exceptions/messages.py +82 -62
- vtlengine/Interpreter/__init__.py +125 -120
- vtlengine/Model/__init__.py +17 -12
- vtlengine/Operators/Aggregation.py +14 -14
- vtlengine/Operators/Analytic.py +56 -31
- vtlengine/Operators/Assignment.py +2 -3
- vtlengine/Operators/Boolean.py +5 -7
- vtlengine/Operators/CastOperator.py +12 -13
- vtlengine/Operators/Clause.py +11 -13
- vtlengine/Operators/Comparison.py +31 -17
- vtlengine/Operators/Conditional.py +157 -17
- vtlengine/Operators/General.py +4 -4
- vtlengine/Operators/HROperators.py +41 -34
- vtlengine/Operators/Join.py +18 -22
- vtlengine/Operators/Numeric.py +76 -39
- vtlengine/Operators/RoleSetter.py +6 -8
- vtlengine/Operators/Set.py +7 -12
- vtlengine/Operators/String.py +19 -27
- vtlengine/Operators/Time.py +366 -43
- vtlengine/Operators/Validation.py +4 -7
- vtlengine/Operators/__init__.py +38 -41
- vtlengine/Utils/__init__.py +149 -94
- vtlengine/__init__.py +1 -1
- vtlengine/files/output/__init__.py +2 -2
- vtlengine/files/output/_time_period_representation.py +0 -1
- vtlengine/files/parser/__init__.py +18 -18
- vtlengine/files/parser/_time_checking.py +3 -2
- {vtlengine-1.0.1.dist-info → vtlengine-1.0.3.dist-info}/METADATA +17 -5
- vtlengine-1.0.3.dist-info/RECORD +58 -0
- vtlengine-1.0.1.dist-info/RECORD +0 -58
- {vtlengine-1.0.1.dist-info → vtlengine-1.0.3.dist-info}/LICENSE.md +0 -0
- {vtlengine-1.0.1.dist-info → vtlengine-1.0.3.dist-info}/WHEEL +0 -0
|
@@ -1,22 +1,19 @@
|
|
|
1
1
|
import operator
|
|
2
2
|
from copy import copy
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import Any, Dict
|
|
4
4
|
|
|
5
|
-
import vtlengine.Operators as Operators
|
|
6
5
|
import pandas as pd
|
|
7
|
-
from vtlengine.DataTypes import Boolean, Number
|
|
8
6
|
from pandas import DataFrame
|
|
9
7
|
|
|
8
|
+
import vtlengine.Operators as Operators
|
|
10
9
|
from vtlengine.AST.Grammar.tokens import HIERARCHY
|
|
11
|
-
from vtlengine.
|
|
10
|
+
from vtlengine.DataTypes import Boolean, Number
|
|
11
|
+
from vtlengine.Model import Component, DataComponent, Dataset, Role
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
def get_measure_from_dataset(dataset: Dataset, code_item: str) -> DataComponent:
|
|
15
15
|
measure_name = dataset.get_measures_names()[0]
|
|
16
|
-
if dataset.data is None
|
|
17
|
-
data = None
|
|
18
|
-
else:
|
|
19
|
-
data = dataset.data[measure_name]
|
|
16
|
+
data = None if dataset.data is None else dataset.data[measure_name]
|
|
20
17
|
return DataComponent(
|
|
21
18
|
name=code_item,
|
|
22
19
|
data=data,
|
|
@@ -35,33 +32,45 @@ class HRComparison(Operators.Binary):
|
|
|
35
32
|
return x - y
|
|
36
33
|
|
|
37
34
|
@staticmethod
|
|
38
|
-
def hr_func(
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
if hr_mode == "partial_null"
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
35
|
+
def hr_func(left_series: Any, right_series: Any, hr_mode: str) -> Any:
|
|
36
|
+
result = pd.Series(True, index=left_series.index)
|
|
37
|
+
|
|
38
|
+
if hr_mode in ("partial_null", "partial_zero"):
|
|
39
|
+
mask_remove = (right_series == "REMOVE_VALUE") & (right_series.notnull())
|
|
40
|
+
if hr_mode == "partial_null":
|
|
41
|
+
mask_null = mask_remove & left_series.notnull()
|
|
42
|
+
else:
|
|
43
|
+
mask_null = mask_remove & (left_series != 0)
|
|
44
|
+
result[mask_remove] = "REMOVE_VALUE"
|
|
45
|
+
result[mask_null] = None
|
|
46
|
+
elif hr_mode == "non_null":
|
|
47
|
+
mask_remove = left_series.isnull() | right_series.isnull()
|
|
48
|
+
result[mask_remove] = "REMOVE_VALUE"
|
|
52
49
|
elif hr_mode == "non_zero":
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
return "REMOVE_VALUE"
|
|
50
|
+
mask_remove = (left_series == 0) & (right_series == 0)
|
|
51
|
+
result[mask_remove] = "REMOVE_VALUE"
|
|
56
52
|
|
|
57
|
-
return
|
|
53
|
+
return result
|
|
58
54
|
|
|
59
55
|
@classmethod
|
|
60
56
|
def apply_hr_func(cls, left_series: Any, right_series: Any, hr_mode: str, func: Any) -> Any:
|
|
61
|
-
|
|
57
|
+
# In order not to apply the function to the whole series, we align the series
|
|
58
|
+
# and apply the function only to the valid values based on a validation mask.
|
|
59
|
+
# The function is applied to the aligned series and the result is combined with the
|
|
60
|
+
# original series.
|
|
61
|
+
left_series, right_series = left_series.align(right_series)
|
|
62
|
+
remove_result = cls.hr_func(left_series, right_series, hr_mode)
|
|
63
|
+
mask_valid = remove_result == True
|
|
64
|
+
result = pd.Series(remove_result, index=left_series.index)
|
|
65
|
+
result.loc[mask_valid] = left_series[mask_valid].combine(right_series[mask_valid], func)
|
|
66
|
+
return result
|
|
62
67
|
|
|
63
68
|
@classmethod
|
|
64
|
-
def validate(cls,
|
|
69
|
+
def validate(cls,
|
|
70
|
+
left_operand: Dataset,
|
|
71
|
+
right_operand: DataComponent,
|
|
72
|
+
hr_mode: str
|
|
73
|
+
) -> Dataset:
|
|
65
74
|
result_components = {
|
|
66
75
|
comp_name: copy(comp)
|
|
67
76
|
for comp_name, comp in left_operand.components.items()
|
|
@@ -80,12 +89,11 @@ class HRComparison(Operators.Binary):
|
|
|
80
89
|
)
|
|
81
90
|
|
|
82
91
|
@classmethod
|
|
83
|
-
def evaluate( # type: ignore[override]
|
|
84
|
-
cls, left: Dataset, right: DataComponent, hr_mode: str
|
|
85
|
-
) -> Dataset:
|
|
92
|
+
def evaluate(cls, left: Dataset, right: DataComponent, hr_mode: str) -> Dataset: # type: ignore[override]
|
|
86
93
|
result = cls.validate(left, right, hr_mode)
|
|
87
94
|
result.data = left.data.copy() if left.data is not None else pd.DataFrame()
|
|
88
95
|
measure_name = left.get_measures_names()[0]
|
|
96
|
+
|
|
89
97
|
if left.data is not None and right.data is not None:
|
|
90
98
|
result.data["bool_var"] = cls.apply_hr_func(
|
|
91
99
|
left.data[measure_name], right.data, hr_mode, cls.op_func
|
|
@@ -93,6 +101,7 @@ class HRComparison(Operators.Binary):
|
|
|
93
101
|
result.data["imbalance"] = cls.apply_hr_func(
|
|
94
102
|
left.data[measure_name], right.data, hr_mode, cls.imbalance_func
|
|
95
103
|
)
|
|
104
|
+
|
|
96
105
|
# Removing datapoints that should not be returned
|
|
97
106
|
# (we do it below imbalance calculation
|
|
98
107
|
# to avoid errors on different shape)
|
|
@@ -203,9 +212,7 @@ class HAAssignment(Operators.Binary):
|
|
|
203
212
|
def handle_mode(cls, x: Any, hr_mode: str) -> Any:
|
|
204
213
|
if not pd.isnull(x) and x == "REMOVE_VALUE":
|
|
205
214
|
return "REMOVE_VALUE"
|
|
206
|
-
if hr_mode == "non_null" and pd.isnull(x):
|
|
207
|
-
return "REMOVE_VALUE"
|
|
208
|
-
elif hr_mode == "non_zero" and x == 0:
|
|
215
|
+
if hr_mode == "non_null" and pd.isnull(x) or hr_mode == "non_zero" and x == 0:
|
|
209
216
|
return "REMOVE_VALUE"
|
|
210
217
|
return x
|
|
211
218
|
|
vtlengine/Operators/Join.py
CHANGED
|
@@ -1,11 +1,6 @@
|
|
|
1
1
|
from copy import copy
|
|
2
2
|
from functools import reduce
|
|
3
|
-
from typing import
|
|
4
|
-
|
|
5
|
-
from vtlengine.DataTypes import binary_implicit_promotion
|
|
6
|
-
|
|
7
|
-
from vtlengine.AST import BinOp
|
|
8
|
-
from vtlengine.Exceptions import SemanticError
|
|
3
|
+
from typing import Any, Dict, List, Optional
|
|
9
4
|
|
|
10
5
|
# if os.environ.get("SPARK"):
|
|
11
6
|
# import pyspark.pandas as pd
|
|
@@ -13,7 +8,10 @@ from vtlengine.Exceptions import SemanticError
|
|
|
13
8
|
# import pandas as pd
|
|
14
9
|
import pandas as pd
|
|
15
10
|
|
|
16
|
-
from vtlengine.
|
|
11
|
+
from vtlengine.AST import BinOp
|
|
12
|
+
from vtlengine.DataTypes import binary_implicit_promotion
|
|
13
|
+
from vtlengine.Exceptions import SemanticError
|
|
14
|
+
from vtlengine.Model import Component, Dataset, Role
|
|
17
15
|
from vtlengine.Operators import Operator, _id_type_promotion_join_keys
|
|
18
16
|
|
|
19
17
|
|
|
@@ -197,7 +195,7 @@ class Join(Operator):
|
|
|
197
195
|
def validate(cls, operands: List[Dataset], using: Optional[List[str]]) -> Dataset:
|
|
198
196
|
if len(operands) < 1 or sum([isinstance(op, Dataset) for op in operands]) < 1:
|
|
199
197
|
raise Exception("Join operator requires at least 1 dataset")
|
|
200
|
-
if not all(
|
|
198
|
+
if not all(isinstance(op, Dataset) for op in operands):
|
|
201
199
|
raise SemanticError("1-1-13-10")
|
|
202
200
|
if len(operands) == 1 and isinstance(operands[0], Dataset):
|
|
203
201
|
return Dataset(name="result", components=operands[0].components, data=None)
|
|
@@ -226,19 +224,17 @@ class Join(Operator):
|
|
|
226
224
|
raise SemanticError("1-1-13-14", op=cls.op, name=op_name)
|
|
227
225
|
|
|
228
226
|
for op_name, identifiers in info.items():
|
|
229
|
-
if op_name != cls.reference_dataset.name and not
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
component=missing_components[0],
|
|
241
|
-
)
|
|
227
|
+
if (using is None and op_name != cls.reference_dataset.name and not
|
|
228
|
+
set(identifiers).issubset(set(info[cls.reference_dataset.name]))):
|
|
229
|
+
missing_components = list(
|
|
230
|
+
set(identifiers) - set(info[cls.reference_dataset.name])
|
|
231
|
+
)
|
|
232
|
+
raise SemanticError(
|
|
233
|
+
"1-1-13-11",
|
|
234
|
+
op=cls.op,
|
|
235
|
+
dataset_reference=cls.reference_dataset.name,
|
|
236
|
+
component=missing_components[0],
|
|
237
|
+
)
|
|
242
238
|
if using is None:
|
|
243
239
|
return
|
|
244
240
|
|
|
@@ -256,7 +252,7 @@ class Join(Operator):
|
|
|
256
252
|
reference=cls.reference_dataset.name,
|
|
257
253
|
)
|
|
258
254
|
|
|
259
|
-
for
|
|
255
|
+
for _, identifiers in info.items():
|
|
260
256
|
if not set(using).issubset(identifiers):
|
|
261
257
|
# (Case B2)
|
|
262
258
|
if not set(using).issubset(reference_components):
|
vtlengine/Operators/Numeric.py
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
|
+
import _random
|
|
1
2
|
import math
|
|
2
3
|
import operator
|
|
3
|
-
|
|
4
|
+
import warnings
|
|
5
|
+
from decimal import Decimal, getcontext
|
|
4
6
|
from typing import Any, Optional, Union
|
|
5
7
|
|
|
6
|
-
import vtlengine.Operators as Operator
|
|
7
8
|
import pandas as pd
|
|
8
|
-
from vtlengine.DataTypes import Integer, Number
|
|
9
|
-
from vtlengine.Operators import ALL_MODEL_DATA_TYPES
|
|
10
9
|
|
|
10
|
+
import vtlengine.Operators as Operator
|
|
11
11
|
from vtlengine.AST.Grammar.tokens import (
|
|
12
12
|
ABS,
|
|
13
13
|
CEIL,
|
|
@@ -21,12 +21,15 @@ from vtlengine.AST.Grammar.tokens import (
|
|
|
21
21
|
MULT,
|
|
22
22
|
PLUS,
|
|
23
23
|
POWER,
|
|
24
|
+
RANDOM,
|
|
24
25
|
ROUND,
|
|
25
26
|
SQRT,
|
|
26
27
|
TRUNC,
|
|
27
28
|
)
|
|
29
|
+
from vtlengine.DataTypes import Integer, Number, binary_implicit_promotion
|
|
28
30
|
from vtlengine.Exceptions import SemanticError
|
|
29
31
|
from vtlengine.Model import DataComponent, Dataset, Scalar
|
|
32
|
+
from vtlengine.Operators import ALL_MODEL_DATA_TYPES
|
|
30
33
|
|
|
31
34
|
|
|
32
35
|
class Unary(Operator.Unary):
|
|
@@ -51,7 +54,8 @@ class Binary(Operator.Binary):
|
|
|
51
54
|
if isinstance(x, int) and isinstance(y, int):
|
|
52
55
|
if cls.op == DIV and y == 0:
|
|
53
56
|
raise SemanticError("2-1-15-6", op=cls.op, value=y)
|
|
54
|
-
|
|
57
|
+
if cls.op == RANDOM:
|
|
58
|
+
return cls.py_op(x, y)
|
|
55
59
|
x = float(x)
|
|
56
60
|
y = float(y)
|
|
57
61
|
# Handles precision to avoid floating point errors
|
|
@@ -68,8 +72,8 @@ class Binary(Operator.Binary):
|
|
|
68
72
|
|
|
69
73
|
class UnPlus(Unary):
|
|
70
74
|
"""
|
|
71
|
-
`Plus <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=94&zoom=100,72,142> `_ unary operator
|
|
72
|
-
"""
|
|
75
|
+
`Plus <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=94&zoom=100,72,142> `_ unary operator
|
|
76
|
+
""" # noqa E501
|
|
73
77
|
|
|
74
78
|
op = PLUS
|
|
75
79
|
py_op = operator.pos
|
|
@@ -81,8 +85,8 @@ class UnPlus(Unary):
|
|
|
81
85
|
|
|
82
86
|
class UnMinus(Unary):
|
|
83
87
|
"""
|
|
84
|
-
`Minus <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=95&zoom=100,72,414> `_unary operator
|
|
85
|
-
"""
|
|
88
|
+
`Minus <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=95&zoom=100,72,414> `_unary operator
|
|
89
|
+
""" # noqa E501
|
|
86
90
|
|
|
87
91
|
op = MINUS
|
|
88
92
|
py_op = operator.neg
|
|
@@ -90,8 +94,8 @@ class UnMinus(Unary):
|
|
|
90
94
|
|
|
91
95
|
class AbsoluteValue(Unary):
|
|
92
96
|
"""
|
|
93
|
-
`Absolute <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=112&zoom=100,72,801> `_ unary operator
|
|
94
|
-
"""
|
|
97
|
+
`Absolute <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=112&zoom=100,72,801> `_ unary operator
|
|
98
|
+
""" # noqa E501
|
|
95
99
|
|
|
96
100
|
op = ABS
|
|
97
101
|
py_op = operator.abs
|
|
@@ -99,8 +103,8 @@ class AbsoluteValue(Unary):
|
|
|
99
103
|
|
|
100
104
|
class Exponential(Unary):
|
|
101
105
|
"""
|
|
102
|
-
`Exponential <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=114&zoom=100,72,94>`_ unary operator
|
|
103
|
-
"""
|
|
106
|
+
`Exponential <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=114&zoom=100,72,94>`_ unary operator
|
|
107
|
+
""" # noqa E501
|
|
104
108
|
|
|
105
109
|
op = EXP
|
|
106
110
|
py_op = math.exp
|
|
@@ -109,9 +113,9 @@ class Exponential(Unary):
|
|
|
109
113
|
|
|
110
114
|
class NaturalLogarithm(Unary):
|
|
111
115
|
"""
|
|
112
|
-
`Natural logarithm <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=115&zoom=100,72,394> `_
|
|
116
|
+
`Natural logarithm <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=115&zoom=100,72,394> `_
|
|
113
117
|
unary operator
|
|
114
|
-
"""
|
|
118
|
+
""" # noqa E501
|
|
115
119
|
|
|
116
120
|
op = LN
|
|
117
121
|
py_op = math.log
|
|
@@ -120,9 +124,9 @@ class NaturalLogarithm(Unary):
|
|
|
120
124
|
|
|
121
125
|
class SquareRoot(Unary):
|
|
122
126
|
"""
|
|
123
|
-
`Square Root <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=119&zoom=100,72,556> '_
|
|
127
|
+
`Square Root <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=119&zoom=100,72,556> '_
|
|
124
128
|
unary operator
|
|
125
|
-
"""
|
|
129
|
+
""" # noqa E501
|
|
126
130
|
|
|
127
131
|
op = SQRT
|
|
128
132
|
py_op = math.sqrt
|
|
@@ -131,8 +135,8 @@ class SquareRoot(Unary):
|
|
|
131
135
|
|
|
132
136
|
class Ceil(Unary):
|
|
133
137
|
"""
|
|
134
|
-
`Ceilling <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=110&zoom=100,72,94> `_ unary operator
|
|
135
|
-
"""
|
|
138
|
+
`Ceilling <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=110&zoom=100,72,94> `_ unary operator
|
|
139
|
+
""" # noqa E501
|
|
136
140
|
|
|
137
141
|
op = CEIL
|
|
138
142
|
py_op = math.ceil
|
|
@@ -141,8 +145,8 @@ class Ceil(Unary):
|
|
|
141
145
|
|
|
142
146
|
class Floor(Unary):
|
|
143
147
|
"""
|
|
144
|
-
`Floor <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=111&zoom=100,72,442> `_ unary operator
|
|
145
|
-
"""
|
|
148
|
+
`Floor <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=111&zoom=100,72,442> `_ unary operator
|
|
149
|
+
""" # noqa E501
|
|
146
150
|
|
|
147
151
|
op = FLOOR
|
|
148
152
|
py_op = math.floor
|
|
@@ -151,8 +155,8 @@ class Floor(Unary):
|
|
|
151
155
|
|
|
152
156
|
class BinPlus(Binary):
|
|
153
157
|
"""
|
|
154
|
-
`Addition <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=96&zoom=100,72,692> `_ binary operator
|
|
155
|
-
"""
|
|
158
|
+
`Addition <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=96&zoom=100,72,692> `_ binary operator
|
|
159
|
+
""" # noqa E501
|
|
156
160
|
|
|
157
161
|
op = PLUS
|
|
158
162
|
py_op = operator.add
|
|
@@ -161,8 +165,8 @@ class BinPlus(Binary):
|
|
|
161
165
|
|
|
162
166
|
class BinMinus(Binary):
|
|
163
167
|
"""
|
|
164
|
-
`Subtraction <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=98&zoom=100,72,448> `_ binary operator
|
|
165
|
-
"""
|
|
168
|
+
`Subtraction <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=98&zoom=100,72,448> `_ binary operator
|
|
169
|
+
""" # noqa E501
|
|
166
170
|
|
|
167
171
|
op = MINUS
|
|
168
172
|
py_op = operator.sub
|
|
@@ -171,9 +175,9 @@ class BinMinus(Binary):
|
|
|
171
175
|
|
|
172
176
|
class Mult(Binary):
|
|
173
177
|
"""
|
|
174
|
-
`Multiplication <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=100&zoom=100,72,254>`_
|
|
178
|
+
`Multiplication <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=100&zoom=100,72,254>`_
|
|
175
179
|
binary operator
|
|
176
|
-
"""
|
|
180
|
+
""" # noqa E501
|
|
177
181
|
|
|
178
182
|
op = MULT
|
|
179
183
|
py_op = operator.mul
|
|
@@ -181,9 +185,9 @@ class Mult(Binary):
|
|
|
181
185
|
|
|
182
186
|
class Div(Binary):
|
|
183
187
|
"""
|
|
184
|
-
`Division <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=102&zoom=100,72,94>`_
|
|
188
|
+
`Division <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=102&zoom=100,72,94>`_
|
|
185
189
|
binary operator
|
|
186
|
-
"""
|
|
190
|
+
""" # noqa E501
|
|
187
191
|
|
|
188
192
|
op = DIV
|
|
189
193
|
py_op = operator.truediv
|
|
@@ -192,8 +196,8 @@ class Div(Binary):
|
|
|
192
196
|
|
|
193
197
|
class Logarithm(Binary):
|
|
194
198
|
"""
|
|
195
|
-
`Logarithm <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=118&zoom=100,72,228>`_ operator
|
|
196
|
-
"""
|
|
199
|
+
`Logarithm <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=118&zoom=100,72,228>`_ operator
|
|
200
|
+
""" # noqa E501
|
|
197
201
|
|
|
198
202
|
op = LOG
|
|
199
203
|
return_type = Number
|
|
@@ -210,8 +214,8 @@ class Logarithm(Binary):
|
|
|
210
214
|
|
|
211
215
|
class Modulo(Binary):
|
|
212
216
|
"""
|
|
213
|
-
`Module <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=104&zoom=100,72,94>`_ operator
|
|
214
|
-
"""
|
|
217
|
+
`Module <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=104&zoom=100,72,94>`_ operator
|
|
218
|
+
""" # noqa E501
|
|
215
219
|
|
|
216
220
|
op = MOD
|
|
217
221
|
py_op = operator.mod
|
|
@@ -219,8 +223,8 @@ class Modulo(Binary):
|
|
|
219
223
|
|
|
220
224
|
class Power(Binary):
|
|
221
225
|
"""
|
|
222
|
-
`Power <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=116&zoom=100,72,693>`_ operator
|
|
223
|
-
"""
|
|
226
|
+
`Power <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=116&zoom=100,72,693>`_ operator
|
|
227
|
+
""" # noqa E501
|
|
224
228
|
|
|
225
229
|
op = POWER
|
|
226
230
|
return_type = Number
|
|
@@ -335,8 +339,8 @@ class Parameterized(Unary):
|
|
|
335
339
|
|
|
336
340
|
class Round(Parameterized):
|
|
337
341
|
"""
|
|
338
|
-
`Round <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=106&zoom=100,72,94>`_ operator
|
|
339
|
-
"""
|
|
342
|
+
`Round <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=106&zoom=100,72,94>`_ operator
|
|
343
|
+
""" # noqa E501
|
|
340
344
|
|
|
341
345
|
op = ROUND
|
|
342
346
|
return_type = Integer
|
|
@@ -360,8 +364,8 @@ class Round(Parameterized):
|
|
|
360
364
|
|
|
361
365
|
class Trunc(Parameterized):
|
|
362
366
|
"""
|
|
363
|
-
`Trunc <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=108&zoom=100,72,94>`_ operator.
|
|
364
|
-
"""
|
|
367
|
+
`Trunc <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=108&zoom=100,72,94>`_ operator.
|
|
368
|
+
""" # noqa E501
|
|
365
369
|
|
|
366
370
|
op = TRUNC
|
|
367
371
|
|
|
@@ -377,3 +381,36 @@ class Trunc(Parameterized):
|
|
|
377
381
|
return truncated_value
|
|
378
382
|
|
|
379
383
|
return int(truncated_value)
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
class PseudoRandom(_random.Random):
|
|
387
|
+
|
|
388
|
+
def __init__(self, seed: Union[int, float]) -> None:
|
|
389
|
+
super().__init__()
|
|
390
|
+
self.seed(seed)
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
class Random(Parameterized):
|
|
394
|
+
|
|
395
|
+
op = RANDOM
|
|
396
|
+
return_type = Number
|
|
397
|
+
|
|
398
|
+
@classmethod
|
|
399
|
+
def validate(cls, seed: Any, index: Any = None) -> Any:
|
|
400
|
+
if index.data_type != Integer:
|
|
401
|
+
index.data_type = binary_implicit_promotion(index.data_type, Integer)
|
|
402
|
+
if index.value < 0:
|
|
403
|
+
raise SemanticError("2-1-15-2", op=cls.op, value=index)
|
|
404
|
+
if index.value > 10000:
|
|
405
|
+
warnings.warn(
|
|
406
|
+
"Random: The value of 'index' is very big. This can affect " "performance.",
|
|
407
|
+
UserWarning,
|
|
408
|
+
)
|
|
409
|
+
return super().validate(seed, index)
|
|
410
|
+
|
|
411
|
+
@classmethod
|
|
412
|
+
def py_op(cls, seed: Union[int, float], index: int) -> float:
|
|
413
|
+
instance: PseudoRandom = PseudoRandom(seed)
|
|
414
|
+
for _ in range(index):
|
|
415
|
+
instance.random()
|
|
416
|
+
return instance.random().__round__(6)
|
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
from copy import copy
|
|
2
2
|
from typing import Any, Union
|
|
3
3
|
|
|
4
|
-
from vtlengine.Exceptions import SemanticError
|
|
5
|
-
|
|
6
4
|
# if os.environ.get("SPARK", False):
|
|
7
5
|
# import pyspark.pandas as pd
|
|
8
6
|
# else:
|
|
9
7
|
# import pandas as pd
|
|
10
8
|
import pandas as pd
|
|
11
9
|
|
|
10
|
+
from vtlengine.Exceptions import SemanticError
|
|
12
11
|
from vtlengine.Model import DataComponent, Role, Scalar
|
|
13
12
|
from vtlengine.Operators import Unary
|
|
14
13
|
|
|
@@ -36,9 +35,9 @@ class RoleSetter(Unary):
|
|
|
36
35
|
|
|
37
36
|
@classmethod
|
|
38
37
|
def evaluate(cls, operand: Any, data_size: int = 0) -> DataComponent:
|
|
39
|
-
if isinstance(operand, DataComponent) and operand.data is not None
|
|
40
|
-
|
|
41
|
-
|
|
38
|
+
if (isinstance(operand, DataComponent) and operand.data is not None and
|
|
39
|
+
not operand.nullable and any(operand.data.isnull())):
|
|
40
|
+
raise SemanticError("1-1-1-16")
|
|
42
41
|
result = cls.validate(operand, data_size)
|
|
43
42
|
if isinstance(operand, Scalar):
|
|
44
43
|
result.data = pd.Series([operand.value] * data_size, dtype=object)
|
|
@@ -61,9 +60,8 @@ class Identifier(RoleSetter):
|
|
|
61
60
|
def evaluate( # type: ignore[override]
|
|
62
61
|
cls, operand: ALLOWED_MODEL_TYPES, data_size: int = 0
|
|
63
62
|
) -> DataComponent:
|
|
64
|
-
if isinstance(operand, Scalar):
|
|
65
|
-
|
|
66
|
-
raise SemanticError("1-1-1-16")
|
|
63
|
+
if isinstance(operand, Scalar) and operand.value is None:
|
|
64
|
+
raise SemanticError("1-1-1-16")
|
|
67
65
|
return super().evaluate(operand, data_size)
|
|
68
66
|
|
|
69
67
|
|
vtlengine/Operators/Set.py
CHANGED
|
@@ -1,6 +1,4 @@
|
|
|
1
|
-
from typing import
|
|
2
|
-
|
|
3
|
-
from vtlengine.Exceptions import SemanticError
|
|
1
|
+
from typing import Any, Dict, List
|
|
4
2
|
|
|
5
3
|
# if os.environ.get("SPARK"):
|
|
6
4
|
# import pyspark.pandas as pd
|
|
@@ -8,9 +6,10 @@ from vtlengine.Exceptions import SemanticError
|
|
|
8
6
|
# import pandas as pd
|
|
9
7
|
import pandas as pd
|
|
10
8
|
|
|
9
|
+
from vtlengine.DataTypes import binary_implicit_promotion
|
|
10
|
+
from vtlengine.Exceptions import SemanticError
|
|
11
11
|
from vtlengine.Model import Dataset
|
|
12
12
|
from vtlengine.Operators import Operator
|
|
13
|
-
from vtlengine.DataTypes import binary_implicit_promotion
|
|
14
13
|
|
|
15
14
|
|
|
16
15
|
class Set(Operator):
|
|
@@ -87,9 +86,7 @@ class Intersection(Set):
|
|
|
87
86
|
data, how="inner", on=result.get_identifiers_names()
|
|
88
87
|
)
|
|
89
88
|
|
|
90
|
-
not_identifiers =
|
|
91
|
-
col for col in result.get_measures_names() + result.get_attributes_names()
|
|
92
|
-
]
|
|
89
|
+
not_identifiers = result.get_measures_names() + result.get_attributes_names()
|
|
93
90
|
|
|
94
91
|
for col in not_identifiers:
|
|
95
92
|
result.data[col] = result.data[col + "_x"]
|
|
@@ -120,8 +117,8 @@ class Symdiff(Set):
|
|
|
120
117
|
result.data["_merge"] = result.data.apply(
|
|
121
118
|
lambda row: (
|
|
122
119
|
"left_only"
|
|
123
|
-
if pd.isnull(row[measure
|
|
124
|
-
else ("right_only" if pd.isnull(row[measure
|
|
120
|
+
if pd.isnull(row[f"{measure}_y"])
|
|
121
|
+
else ("right_only" if pd.isnull(row[f"{measure}_x"]) else "both")
|
|
125
122
|
),
|
|
126
123
|
axis=1,
|
|
127
124
|
)
|
|
@@ -162,9 +159,7 @@ class Setdiff(Set):
|
|
|
162
159
|
if len(result.data) > 0:
|
|
163
160
|
result.data = result.data[result.data.apply(cls.has_null, axis=1)]
|
|
164
161
|
|
|
165
|
-
not_identifiers =
|
|
166
|
-
col for col in result.get_measures_names() + result.get_attributes_names()
|
|
167
|
-
]
|
|
162
|
+
not_identifiers = result.get_measures_names() + result.get_attributes_names()
|
|
168
163
|
for col in not_identifiers:
|
|
169
164
|
if col + "_x" in result.data:
|
|
170
165
|
result.data[col] = result.data[col + "_x"]
|
vtlengine/Operators/String.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import operator
|
|
2
2
|
import re
|
|
3
|
-
from
|
|
4
|
-
from vtlengine.Model import DataComponent, Dataset, Scalar
|
|
3
|
+
from typing import Any, Optional, Union
|
|
5
4
|
|
|
6
5
|
# if os.environ.get("SPARK", False):
|
|
7
6
|
# import pyspark.pandas as pd
|
|
@@ -9,21 +8,22 @@ from vtlengine.Model import DataComponent, Dataset, Scalar
|
|
|
9
8
|
# import pandas as pd
|
|
10
9
|
import pandas as pd
|
|
11
10
|
|
|
12
|
-
|
|
11
|
+
import vtlengine.Operators as Operator
|
|
13
12
|
from vtlengine.AST.Grammar.tokens import (
|
|
14
|
-
LEN,
|
|
15
13
|
CONCAT,
|
|
16
|
-
|
|
14
|
+
INSTR,
|
|
17
15
|
LCASE,
|
|
16
|
+
LEN,
|
|
17
|
+
LTRIM,
|
|
18
|
+
REPLACE,
|
|
18
19
|
RTRIM,
|
|
19
20
|
SUBSTR,
|
|
20
|
-
LTRIM,
|
|
21
21
|
TRIM,
|
|
22
|
-
|
|
23
|
-
INSTR,
|
|
22
|
+
UCASE,
|
|
24
23
|
)
|
|
25
24
|
from vtlengine.DataTypes import Integer, String, check_unary_implicit_promotion
|
|
26
|
-
|
|
25
|
+
from vtlengine.Exceptions import SemanticError
|
|
26
|
+
from vtlengine.Model import DataComponent, Dataset, Scalar
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
class Unary(Operator.Unary):
|
|
@@ -280,10 +280,7 @@ class Substr(Parameterized):
|
|
|
280
280
|
param1 -= 1
|
|
281
281
|
elif param1 > (len(x)):
|
|
282
282
|
return ""
|
|
283
|
-
if param2 is None or
|
|
284
|
-
param2 = len(x)
|
|
285
|
-
else:
|
|
286
|
-
param2 = param1 + param2
|
|
283
|
+
param2 = len(x) if param2 is None or param1 + param2 > len(x) else param1 + param2
|
|
287
284
|
return x[param1:param2]
|
|
288
285
|
|
|
289
286
|
@classmethod
|
|
@@ -408,14 +405,12 @@ class Instr(Parameterized):
|
|
|
408
405
|
|
|
409
406
|
@classmethod
|
|
410
407
|
def check_param_value(cls, param: Any, position: int) -> None:
|
|
411
|
-
if position == 2:
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
"1-1-18-4", op=cls.op, param_type="Occurrence", correct_type=">= 1"
|
|
418
|
-
)
|
|
408
|
+
if position == 2 and not pd.isnull(param) and param < 1:
|
|
409
|
+
raise SemanticError("1-1-18-4", op=cls.op, param_type="Start", correct_type=">= 1")
|
|
410
|
+
elif position == 3 and not pd.isnull(param) and param < 1:
|
|
411
|
+
raise SemanticError(
|
|
412
|
+
"1-1-18-4", op=cls.op, param_type="Occurrence", correct_type=">= 1"
|
|
413
|
+
)
|
|
419
414
|
|
|
420
415
|
@classmethod
|
|
421
416
|
def apply_operation_series_scalar(
|
|
@@ -549,7 +544,7 @@ class Instr(Parameterized):
|
|
|
549
544
|
) -> Any:
|
|
550
545
|
str_value = str(str_value)
|
|
551
546
|
if not pd.isnull(start):
|
|
552
|
-
if isinstance(start, int
|
|
547
|
+
if isinstance(start, (int, float)):
|
|
553
548
|
start = int(start - 1)
|
|
554
549
|
else:
|
|
555
550
|
# OPERATORS_STRINGOPERATORS.92
|
|
@@ -560,7 +555,7 @@ class Instr(Parameterized):
|
|
|
560
555
|
start = 0
|
|
561
556
|
|
|
562
557
|
if not pd.isnull(occurrence):
|
|
563
|
-
if isinstance(occurrence, int
|
|
558
|
+
if isinstance(occurrence, (int, float)):
|
|
564
559
|
occurrence = int(occurrence - 1)
|
|
565
560
|
else:
|
|
566
561
|
# OPERATORS_STRINGOPERATORS.93
|
|
@@ -578,9 +573,6 @@ class Instr(Parameterized):
|
|
|
578
573
|
|
|
579
574
|
length = len(occurrences_list)
|
|
580
575
|
|
|
581
|
-
if occurrence > length - 1
|
|
582
|
-
position = 0
|
|
583
|
-
else:
|
|
584
|
-
position = int(start + occurrences_list[occurrence] + 1)
|
|
576
|
+
position = 0 if occurrence > length - 1 else int(start + occurrences_list[occurrence] + 1)
|
|
585
577
|
|
|
586
578
|
return position
|