vtlengine 1.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +153 -100
- vtlengine/API/__init__.py +109 -67
- vtlengine/AST/ASTConstructor.py +188 -98
- vtlengine/AST/ASTConstructorModules/Expr.py +306 -200
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +172 -102
- vtlengine/AST/ASTConstructorModules/Terminals.py +158 -95
- vtlengine/AST/ASTEncoders.py +1 -1
- vtlengine/AST/ASTTemplate.py +8 -9
- vtlengine/AST/ASTVisitor.py +8 -12
- vtlengine/AST/DAG/__init__.py +43 -35
- vtlengine/AST/DAG/_words.py +4 -4
- vtlengine/AST/Grammar/lexer.py +732 -142
- vtlengine/AST/Grammar/parser.py +2188 -826
- vtlengine/AST/Grammar/tokens.py +128 -128
- vtlengine/AST/VtlVisitor.py +7 -4
- vtlengine/AST/__init__.py +22 -11
- vtlengine/DataTypes/NumericTypesHandling.py +5 -4
- vtlengine/DataTypes/TimeHandling.py +194 -301
- vtlengine/DataTypes/__init__.py +304 -218
- vtlengine/Exceptions/__init__.py +52 -27
- vtlengine/Exceptions/messages.py +134 -62
- vtlengine/Interpreter/__init__.py +781 -487
- vtlengine/Model/__init__.py +165 -121
- vtlengine/Operators/Aggregation.py +156 -95
- vtlengine/Operators/Analytic.py +115 -59
- vtlengine/Operators/Assignment.py +7 -4
- vtlengine/Operators/Boolean.py +27 -32
- vtlengine/Operators/CastOperator.py +177 -131
- vtlengine/Operators/Clause.py +137 -99
- vtlengine/Operators/Comparison.py +148 -117
- vtlengine/Operators/Conditional.py +149 -98
- vtlengine/Operators/General.py +68 -47
- vtlengine/Operators/HROperators.py +91 -72
- vtlengine/Operators/Join.py +217 -118
- vtlengine/Operators/Numeric.py +89 -44
- vtlengine/Operators/RoleSetter.py +16 -15
- vtlengine/Operators/Set.py +61 -36
- vtlengine/Operators/String.py +213 -139
- vtlengine/Operators/Time.py +334 -216
- vtlengine/Operators/Validation.py +117 -76
- vtlengine/Operators/__init__.py +340 -213
- vtlengine/Utils/__init__.py +195 -40
- vtlengine/__init__.py +1 -1
- vtlengine/files/output/__init__.py +15 -6
- vtlengine/files/output/_time_period_representation.py +10 -9
- vtlengine/files/parser/__init__.py +77 -52
- vtlengine/files/parser/_rfc_dialect.py +6 -5
- vtlengine/files/parser/_time_checking.py +46 -37
- vtlengine-1.0.1.dist-info/METADATA +236 -0
- vtlengine-1.0.1.dist-info/RECORD +58 -0
- {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/WHEEL +1 -1
- vtlengine-1.0.dist-info/METADATA +0 -104
- vtlengine-1.0.dist-info/RECORD +0 -58
- {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/LICENSE.md +0 -0
vtlengine/Operators/Numeric.py
CHANGED
|
@@ -8,9 +8,23 @@ import pandas as pd
|
|
|
8
8
|
from vtlengine.DataTypes import Integer, Number
|
|
9
9
|
from vtlengine.Operators import ALL_MODEL_DATA_TYPES
|
|
10
10
|
|
|
11
|
-
from vtlengine.AST.Grammar.tokens import
|
|
12
|
-
|
|
13
|
-
|
|
11
|
+
from vtlengine.AST.Grammar.tokens import (
|
|
12
|
+
ABS,
|
|
13
|
+
CEIL,
|
|
14
|
+
DIV,
|
|
15
|
+
EXP,
|
|
16
|
+
FLOOR,
|
|
17
|
+
LN,
|
|
18
|
+
LOG,
|
|
19
|
+
MINUS,
|
|
20
|
+
MOD,
|
|
21
|
+
MULT,
|
|
22
|
+
PLUS,
|
|
23
|
+
POWER,
|
|
24
|
+
ROUND,
|
|
25
|
+
SQRT,
|
|
26
|
+
TRUNC,
|
|
27
|
+
)
|
|
14
28
|
from vtlengine.Exceptions import SemanticError
|
|
15
29
|
from vtlengine.Model import DataComponent, Dataset, Scalar
|
|
16
30
|
|
|
@@ -19,6 +33,7 @@ class Unary(Operator.Unary):
|
|
|
19
33
|
"""
|
|
20
34
|
Checks that the unary operation is performed with a number.
|
|
21
35
|
"""
|
|
36
|
+
|
|
22
37
|
type_to_check = Number
|
|
23
38
|
|
|
24
39
|
|
|
@@ -26,6 +41,7 @@ class Binary(Operator.Binary):
|
|
|
26
41
|
"""
|
|
27
42
|
Checks that the binary operation is performed with numbers.
|
|
28
43
|
"""
|
|
44
|
+
|
|
29
45
|
type_to_check = Number
|
|
30
46
|
|
|
31
47
|
@classmethod
|
|
@@ -52,8 +68,9 @@ class Binary(Operator.Binary):
|
|
|
52
68
|
|
|
53
69
|
class UnPlus(Unary):
|
|
54
70
|
"""
|
|
55
|
-
`Plus <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=94&zoom=100,72,142> `_ unary operator
|
|
71
|
+
`Plus <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=94&zoom=100,72,142> `_ unary operator # noqa E501
|
|
56
72
|
"""
|
|
73
|
+
|
|
57
74
|
op = PLUS
|
|
58
75
|
py_op = operator.pos
|
|
59
76
|
|
|
@@ -64,24 +81,27 @@ class UnPlus(Unary):
|
|
|
64
81
|
|
|
65
82
|
class UnMinus(Unary):
|
|
66
83
|
"""
|
|
67
|
-
`Minus <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=95&zoom=100,72,414> `_unary operator
|
|
84
|
+
`Minus <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=95&zoom=100,72,414> `_unary operator # noqa E501
|
|
68
85
|
"""
|
|
86
|
+
|
|
69
87
|
op = MINUS
|
|
70
88
|
py_op = operator.neg
|
|
71
89
|
|
|
72
90
|
|
|
73
91
|
class AbsoluteValue(Unary):
|
|
74
92
|
"""
|
|
75
|
-
`Absolute <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=112&zoom=100,72,801> `_ unary operator
|
|
93
|
+
`Absolute <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=112&zoom=100,72,801> `_ unary operator # noqa E501
|
|
76
94
|
"""
|
|
95
|
+
|
|
77
96
|
op = ABS
|
|
78
97
|
py_op = operator.abs
|
|
79
98
|
|
|
80
99
|
|
|
81
100
|
class Exponential(Unary):
|
|
82
101
|
"""
|
|
83
|
-
`Exponential <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=114&zoom=100,72,94>`_ unary operator
|
|
102
|
+
`Exponential <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=114&zoom=100,72,94>`_ unary operator # noqa E501
|
|
84
103
|
"""
|
|
104
|
+
|
|
85
105
|
op = EXP
|
|
86
106
|
py_op = math.exp
|
|
87
107
|
return_type = Number
|
|
@@ -89,9 +109,10 @@ class Exponential(Unary):
|
|
|
89
109
|
|
|
90
110
|
class NaturalLogarithm(Unary):
|
|
91
111
|
"""
|
|
92
|
-
`Natural logarithm <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=115&zoom=100,72,394> `_
|
|
112
|
+
`Natural logarithm <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=115&zoom=100,72,394> `_ # noqa E501
|
|
93
113
|
unary operator
|
|
94
114
|
"""
|
|
115
|
+
|
|
95
116
|
op = LN
|
|
96
117
|
py_op = math.log
|
|
97
118
|
return_type = Number
|
|
@@ -99,9 +120,10 @@ class NaturalLogarithm(Unary):
|
|
|
99
120
|
|
|
100
121
|
class SquareRoot(Unary):
|
|
101
122
|
"""
|
|
102
|
-
`Square Root <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=119&zoom=100,72,556> '_
|
|
123
|
+
`Square Root <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=119&zoom=100,72,556> '_ # noqa E501
|
|
103
124
|
unary operator
|
|
104
125
|
"""
|
|
126
|
+
|
|
105
127
|
op = SQRT
|
|
106
128
|
py_op = math.sqrt
|
|
107
129
|
return_type = Number
|
|
@@ -109,8 +131,9 @@ class SquareRoot(Unary):
|
|
|
109
131
|
|
|
110
132
|
class Ceil(Unary):
|
|
111
133
|
"""
|
|
112
|
-
`Ceilling <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=110&zoom=100,72,94> `_ unary operator
|
|
134
|
+
`Ceilling <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=110&zoom=100,72,94> `_ unary operator # noqa E501
|
|
113
135
|
"""
|
|
136
|
+
|
|
114
137
|
op = CEIL
|
|
115
138
|
py_op = math.ceil
|
|
116
139
|
return_type = Integer
|
|
@@ -118,8 +141,9 @@ class Ceil(Unary):
|
|
|
118
141
|
|
|
119
142
|
class Floor(Unary):
|
|
120
143
|
"""
|
|
121
|
-
`Floor <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=111&zoom=100,72,442> `_ unary operator
|
|
144
|
+
`Floor <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=111&zoom=100,72,442> `_ unary operator # noqa E501
|
|
122
145
|
"""
|
|
146
|
+
|
|
123
147
|
op = FLOOR
|
|
124
148
|
py_op = math.floor
|
|
125
149
|
return_type = Integer
|
|
@@ -127,8 +151,9 @@ class Floor(Unary):
|
|
|
127
151
|
|
|
128
152
|
class BinPlus(Binary):
|
|
129
153
|
"""
|
|
130
|
-
`Addition <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=96&zoom=100,72,692> `_ binary operator
|
|
154
|
+
`Addition <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=96&zoom=100,72,692> `_ binary operator # noqa E501
|
|
131
155
|
"""
|
|
156
|
+
|
|
132
157
|
op = PLUS
|
|
133
158
|
py_op = operator.add
|
|
134
159
|
type_to_check = Number
|
|
@@ -136,8 +161,9 @@ class BinPlus(Binary):
|
|
|
136
161
|
|
|
137
162
|
class BinMinus(Binary):
|
|
138
163
|
"""
|
|
139
|
-
`Subtraction <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=98&zoom=100,72,448> `_ binary operator
|
|
164
|
+
`Subtraction <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=98&zoom=100,72,448> `_ binary operator # noqa E501
|
|
140
165
|
"""
|
|
166
|
+
|
|
141
167
|
op = MINUS
|
|
142
168
|
py_op = operator.sub
|
|
143
169
|
type_to_check = Number
|
|
@@ -145,18 +171,20 @@ class BinMinus(Binary):
|
|
|
145
171
|
|
|
146
172
|
class Mult(Binary):
|
|
147
173
|
"""
|
|
148
|
-
`Multiplication <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=100&zoom=100,72,254>`_
|
|
174
|
+
`Multiplication <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=100&zoom=100,72,254>`_ # noqa E501
|
|
149
175
|
binary operator
|
|
150
176
|
"""
|
|
177
|
+
|
|
151
178
|
op = MULT
|
|
152
179
|
py_op = operator.mul
|
|
153
180
|
|
|
154
181
|
|
|
155
182
|
class Div(Binary):
|
|
156
183
|
"""
|
|
157
|
-
`Division <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=102&zoom=100,72,94>`_
|
|
184
|
+
`Division <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=102&zoom=100,72,94>`_ # noqa E501
|
|
158
185
|
binary operator
|
|
159
186
|
"""
|
|
187
|
+
|
|
160
188
|
op = DIV
|
|
161
189
|
py_op = operator.truediv
|
|
162
190
|
return_type = Number
|
|
@@ -164,8 +192,9 @@ class Div(Binary):
|
|
|
164
192
|
|
|
165
193
|
class Logarithm(Binary):
|
|
166
194
|
"""
|
|
167
|
-
`Logarithm <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=118&zoom=100,72,228>`_ operator
|
|
195
|
+
`Logarithm <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=118&zoom=100,72,228>`_ operator # noqa E501
|
|
168
196
|
"""
|
|
197
|
+
|
|
169
198
|
op = LOG
|
|
170
199
|
return_type = Number
|
|
171
200
|
|
|
@@ -181,16 +210,18 @@ class Logarithm(Binary):
|
|
|
181
210
|
|
|
182
211
|
class Modulo(Binary):
|
|
183
212
|
"""
|
|
184
|
-
`Module <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=104&zoom=100,72,94>`_ operator
|
|
213
|
+
`Module <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=104&zoom=100,72,94>`_ operator # noqa E501
|
|
185
214
|
"""
|
|
215
|
+
|
|
186
216
|
op = MOD
|
|
187
217
|
py_op = operator.mod
|
|
188
218
|
|
|
189
219
|
|
|
190
220
|
class Power(Binary):
|
|
191
221
|
"""
|
|
192
|
-
`Power <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=116&zoom=100,72,693>`_ operator
|
|
222
|
+
`Power <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=116&zoom=100,72,693>`_ operator # noqa E501
|
|
193
223
|
"""
|
|
224
|
+
|
|
194
225
|
op = POWER
|
|
195
226
|
return_type = Number
|
|
196
227
|
|
|
@@ -198,26 +229,30 @@ class Power(Binary):
|
|
|
198
229
|
def py_op(cls, x: Any, param: Any) -> Any:
|
|
199
230
|
if pd.isnull(param):
|
|
200
231
|
return None
|
|
201
|
-
return x
|
|
232
|
+
return x**param
|
|
202
233
|
|
|
203
234
|
|
|
204
235
|
class Parameterized(Unary):
|
|
205
236
|
"""Parametrized class
|
|
206
|
-
|
|
207
|
-
|
|
237
|
+
Inherits from Unary class, to validate the data type and evaluate if it is the correct one to
|
|
238
|
+
perform the operation. Similar to Unary, but in the end, the param validation is added.
|
|
208
239
|
"""
|
|
209
240
|
|
|
210
241
|
@classmethod
|
|
211
|
-
def validate(
|
|
212
|
-
|
|
242
|
+
def validate(
|
|
243
|
+
cls,
|
|
244
|
+
operand: Operator.ALL_MODEL_DATA_TYPES,
|
|
245
|
+
param: Optional[Union[DataComponent, Scalar]] = None,
|
|
246
|
+
) -> Any:
|
|
213
247
|
|
|
214
248
|
if param is not None:
|
|
215
249
|
if isinstance(param, Dataset):
|
|
216
250
|
raise SemanticError("1-1-15-8", op=cls.op, comp_type="Dataset")
|
|
217
251
|
if isinstance(param, DataComponent):
|
|
218
252
|
if isinstance(operand, Scalar):
|
|
219
|
-
raise SemanticError(
|
|
220
|
-
|
|
253
|
+
raise SemanticError(
|
|
254
|
+
"1-1-15-8", op=cls.op, comp_type="DataComponent and an Scalar operand"
|
|
255
|
+
)
|
|
221
256
|
cls.validate_type_compatibility(param.data_type)
|
|
222
257
|
else:
|
|
223
258
|
cls.validate_scalar_type(param)
|
|
@@ -233,17 +268,19 @@ class Parameterized(Unary):
|
|
|
233
268
|
return None if pd.isnull(x) else cls.py_op(x, param)
|
|
234
269
|
|
|
235
270
|
@classmethod
|
|
236
|
-
def apply_operation_two_series(cls, left_series:
|
|
271
|
+
def apply_operation_two_series(cls, left_series: Any, right_series: Any) -> Any:
|
|
237
272
|
return left_series.combine(right_series, cls.op_func)
|
|
238
273
|
|
|
239
274
|
@classmethod
|
|
240
|
-
def apply_operation_series_scalar(cls, series:
|
|
275
|
+
def apply_operation_series_scalar(cls, series: Any, param: Any) -> Any:
|
|
241
276
|
return series.map(lambda x: cls.op_func(x, param))
|
|
242
277
|
|
|
243
278
|
@classmethod
|
|
244
|
-
def dataset_evaluation(
|
|
279
|
+
def dataset_evaluation(
|
|
280
|
+
cls, operand: Dataset, param: Optional[Union[DataComponent, Scalar]] = None
|
|
281
|
+
) -> Dataset:
|
|
245
282
|
result = cls.validate(operand, param)
|
|
246
|
-
result.data = operand.data.copy()
|
|
283
|
+
result.data = operand.data.copy() if operand.data is not None else pd.DataFrame()
|
|
247
284
|
for measure_name in result.get_measures_names():
|
|
248
285
|
try:
|
|
249
286
|
if isinstance(param, DataComponent):
|
|
@@ -251,49 +288,56 @@ class Parameterized(Unary):
|
|
|
251
288
|
result.data[measure_name], param.data
|
|
252
289
|
)
|
|
253
290
|
else:
|
|
254
|
-
param_value =
|
|
291
|
+
param_value = param.value if param is not None else None
|
|
255
292
|
result.data[measure_name] = cls.apply_operation_series_scalar(
|
|
256
293
|
result.data[measure_name], param_value
|
|
257
294
|
)
|
|
258
295
|
except ValueError:
|
|
259
|
-
raise SemanticError(
|
|
260
|
-
|
|
296
|
+
raise SemanticError(
|
|
297
|
+
"2-1-15-1", op=cls.op, comp_name=measure_name, dataset_name=operand.name
|
|
298
|
+
) from None
|
|
261
299
|
result.data = result.data[result.get_components_names()]
|
|
262
300
|
return result
|
|
263
301
|
|
|
264
302
|
@classmethod
|
|
265
|
-
def component_evaluation(
|
|
303
|
+
def component_evaluation(
|
|
304
|
+
cls, operand: DataComponent, param: Optional[Union[DataComponent, Scalar]] = None
|
|
305
|
+
) -> DataComponent:
|
|
266
306
|
result = cls.validate(operand, param)
|
|
307
|
+
if operand.data is None:
|
|
308
|
+
operand.data = pd.Series()
|
|
267
309
|
result.data = operand.data.copy()
|
|
268
310
|
if isinstance(param, DataComponent):
|
|
269
311
|
result.data = cls.apply_operation_two_series(operand.data, param.data)
|
|
270
312
|
else:
|
|
271
|
-
param_value =
|
|
313
|
+
param_value = param.value if param is not None else None
|
|
272
314
|
result.data = cls.apply_operation_series_scalar(operand.data, param_value)
|
|
273
315
|
return result
|
|
274
316
|
|
|
275
317
|
@classmethod
|
|
276
|
-
def scalar_evaluation(cls, operand: Scalar, param: Scalar
|
|
318
|
+
def scalar_evaluation(cls, operand: Scalar, param: Optional[Any] = None) -> Scalar:
|
|
277
319
|
result = cls.validate(operand, param)
|
|
278
|
-
param_value =
|
|
320
|
+
param_value = param.value if param is not None else None
|
|
279
321
|
result.value = cls.op_func(operand.value, param_value)
|
|
280
322
|
return result
|
|
281
323
|
|
|
282
324
|
@classmethod
|
|
283
|
-
def evaluate(
|
|
284
|
-
|
|
325
|
+
def evaluate(
|
|
326
|
+
cls, operand: ALL_MODEL_DATA_TYPES, param: Optional[Union[DataComponent, Scalar]] = None
|
|
327
|
+
) -> Union[DataComponent, Dataset, Scalar]:
|
|
285
328
|
if isinstance(operand, Dataset):
|
|
286
329
|
return cls.dataset_evaluation(operand, param)
|
|
287
|
-
|
|
330
|
+
elif isinstance(operand, DataComponent):
|
|
288
331
|
return cls.component_evaluation(operand, param)
|
|
289
|
-
|
|
332
|
+
else:
|
|
290
333
|
return cls.scalar_evaluation(operand, param)
|
|
291
334
|
|
|
292
335
|
|
|
293
336
|
class Round(Parameterized):
|
|
294
337
|
"""
|
|
295
|
-
`Round <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=106&zoom=100,72,94>`_ operator
|
|
338
|
+
`Round <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=106&zoom=100,72,94>`_ operator # noqa E501
|
|
296
339
|
"""
|
|
340
|
+
|
|
297
341
|
op = ROUND
|
|
298
342
|
return_type = Integer
|
|
299
343
|
|
|
@@ -301,7 +345,7 @@ class Round(Parameterized):
|
|
|
301
345
|
def py_op(cls, x: Any, param: Any) -> Any:
|
|
302
346
|
multiplier = 1.0
|
|
303
347
|
if not pd.isnull(param):
|
|
304
|
-
multiplier = 10
|
|
348
|
+
multiplier = 10**param
|
|
305
349
|
|
|
306
350
|
if x >= 0.0:
|
|
307
351
|
rounded_value = math.floor(x * multiplier + 0.5) / multiplier
|
|
@@ -316,15 +360,16 @@ class Round(Parameterized):
|
|
|
316
360
|
|
|
317
361
|
class Trunc(Parameterized):
|
|
318
362
|
"""
|
|
319
|
-
`Trunc <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=108&zoom=100,72,94>`_ operator.
|
|
363
|
+
`Trunc <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=108&zoom=100,72,94>`_ operator. # noqa E501
|
|
320
364
|
"""
|
|
365
|
+
|
|
321
366
|
op = TRUNC
|
|
322
367
|
|
|
323
368
|
@classmethod
|
|
324
369
|
def py_op(cls, x: float, param: Optional[float]) -> Any:
|
|
325
370
|
multiplier = 1.0
|
|
326
371
|
if not pd.isnull(param):
|
|
327
|
-
multiplier = 10
|
|
372
|
+
multiplier = 10**param
|
|
328
373
|
|
|
329
374
|
truncated_value = int(x * multiplier) / multiplier
|
|
330
375
|
|
|
@@ -1,43 +1,42 @@
|
|
|
1
|
-
import os
|
|
2
1
|
from copy import copy
|
|
2
|
+
from typing import Any, Union
|
|
3
3
|
|
|
4
4
|
from vtlengine.Exceptions import SemanticError
|
|
5
5
|
|
|
6
|
-
if os.environ.get("SPARK", False):
|
|
7
|
-
|
|
8
|
-
else:
|
|
9
|
-
|
|
6
|
+
# if os.environ.get("SPARK", False):
|
|
7
|
+
# import pyspark.pandas as pd
|
|
8
|
+
# else:
|
|
9
|
+
# import pandas as pd
|
|
10
|
+
import pandas as pd
|
|
10
11
|
|
|
11
12
|
from vtlengine.Model import DataComponent, Role, Scalar
|
|
12
13
|
from vtlengine.Operators import Unary
|
|
13
14
|
|
|
14
|
-
ALLOWED_MODEL_TYPES = [DataComponent, Scalar]
|
|
15
|
+
ALLOWED_MODEL_TYPES = Union[DataComponent, Scalar]
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
class RoleSetter(Unary):
|
|
18
|
-
role
|
|
19
|
+
role: Role
|
|
19
20
|
|
|
20
21
|
@classmethod
|
|
21
|
-
def validate(cls, operand: ALLOWED_MODEL_TYPES, data_size: int = 0):
|
|
22
|
+
def validate(cls, operand: ALLOWED_MODEL_TYPES, data_size: int = 0) -> DataComponent:
|
|
22
23
|
if isinstance(operand, Scalar):
|
|
23
|
-
|
|
24
24
|
nullable = True
|
|
25
25
|
if cls.role == Role.IDENTIFIER or operand.value is not None:
|
|
26
26
|
nullable = False
|
|
27
|
-
|
|
28
27
|
return DataComponent(
|
|
29
28
|
name=operand.name,
|
|
30
29
|
data_type=operand.data_type,
|
|
31
30
|
role=cls.role,
|
|
32
31
|
nullable=nullable,
|
|
33
|
-
data=None
|
|
32
|
+
data=None,
|
|
34
33
|
)
|
|
35
34
|
operand.role = cls.role
|
|
36
35
|
return copy(operand)
|
|
37
36
|
|
|
38
37
|
@classmethod
|
|
39
|
-
def evaluate(cls, operand:
|
|
40
|
-
if isinstance(operand, DataComponent):
|
|
38
|
+
def evaluate(cls, operand: Any, data_size: int = 0) -> DataComponent:
|
|
39
|
+
if isinstance(operand, DataComponent) and operand.data is not None:
|
|
41
40
|
if not operand.nullable and any(operand.data.isnull()):
|
|
42
41
|
raise SemanticError("1-1-1-16")
|
|
43
42
|
result = cls.validate(operand, data_size)
|
|
@@ -52,14 +51,16 @@ class Identifier(RoleSetter):
|
|
|
52
51
|
role = Role.IDENTIFIER
|
|
53
52
|
|
|
54
53
|
@classmethod
|
|
55
|
-
def validate(cls, operand: ALLOWED_MODEL_TYPES, data_size: int = 0):
|
|
54
|
+
def validate(cls, operand: ALLOWED_MODEL_TYPES, data_size: int = 0) -> DataComponent:
|
|
56
55
|
result = super().validate(operand)
|
|
57
56
|
if result.nullable:
|
|
58
57
|
raise SemanticError("1-1-1-16")
|
|
59
58
|
return result
|
|
60
59
|
|
|
61
60
|
@classmethod
|
|
62
|
-
def evaluate(
|
|
61
|
+
def evaluate( # type: ignore[override]
|
|
62
|
+
cls, operand: ALLOWED_MODEL_TYPES, data_size: int = 0
|
|
63
|
+
) -> DataComponent:
|
|
63
64
|
if isinstance(operand, Scalar):
|
|
64
65
|
if operand.value is None:
|
|
65
66
|
raise SemanticError("1-1-1-16")
|
vtlengine/Operators/Set.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
import
|
|
2
|
-
from typing import List
|
|
1
|
+
from typing import List, Any, Dict
|
|
3
2
|
|
|
4
3
|
from vtlengine.Exceptions import SemanticError
|
|
5
4
|
|
|
6
|
-
if os.environ.get("SPARK"):
|
|
7
|
-
|
|
8
|
-
else:
|
|
9
|
-
|
|
5
|
+
# if os.environ.get("SPARK"):
|
|
6
|
+
# import pyspark.pandas as pd
|
|
7
|
+
# else:
|
|
8
|
+
# import pandas as pd
|
|
9
|
+
import pandas as pd
|
|
10
10
|
|
|
11
11
|
from vtlengine.Model import Dataset
|
|
12
12
|
from vtlengine.Operators import Operator
|
|
@@ -18,18 +18,22 @@ class Set(Operator):
|
|
|
18
18
|
@classmethod
|
|
19
19
|
def check_same_structure(cls, dataset_1: Dataset, dataset_2: Dataset) -> None:
|
|
20
20
|
if len(dataset_1.components) != len(dataset_2.components):
|
|
21
|
-
raise SemanticError(
|
|
22
|
-
|
|
21
|
+
raise SemanticError(
|
|
22
|
+
"1-1-17-1", op=cls.op, dataset_1=dataset_1.name, dataset_2=dataset_2.name
|
|
23
|
+
)
|
|
23
24
|
|
|
24
25
|
for comp in dataset_1.components.values():
|
|
25
26
|
if comp.name not in dataset_2.components:
|
|
26
27
|
raise Exception(f"Component {comp.name} not found in dataset {dataset_2.name}")
|
|
27
28
|
second_comp = dataset_2.components[comp.name]
|
|
28
|
-
binary_implicit_promotion(
|
|
29
|
-
|
|
29
|
+
binary_implicit_promotion(
|
|
30
|
+
comp.data_type, second_comp.data_type, cls.type_to_check, cls.return_type
|
|
31
|
+
)
|
|
30
32
|
if comp.role != second_comp.role:
|
|
31
|
-
raise Exception(
|
|
32
|
-
|
|
33
|
+
raise Exception(
|
|
34
|
+
f"Component {comp.name} has different roles "
|
|
35
|
+
f"in datasets {dataset_1.name} and {dataset_2.name}"
|
|
36
|
+
)
|
|
33
37
|
|
|
34
38
|
@classmethod
|
|
35
39
|
def validate(cls, operands: List[Dataset]) -> Dataset:
|
|
@@ -38,7 +42,7 @@ class Set(Operator):
|
|
|
38
42
|
for operand in operands[1:]:
|
|
39
43
|
cls.check_same_structure(base_operand, operand)
|
|
40
44
|
|
|
41
|
-
result_components = {}
|
|
45
|
+
result_components: Dict[str, Any] = {}
|
|
42
46
|
for operand in operands:
|
|
43
47
|
if len(result_components) == 0:
|
|
44
48
|
result_components = operand.components
|
|
@@ -46,7 +50,8 @@ class Set(Operator):
|
|
|
46
50
|
for comp_name, comp in operand.components.items():
|
|
47
51
|
current_comp = result_components[comp_name]
|
|
48
52
|
result_components[comp_name].data_type = binary_implicit_promotion(
|
|
49
|
-
current_comp.data_type, comp.data_type
|
|
53
|
+
current_comp.data_type, comp.data_type
|
|
54
|
+
)
|
|
50
55
|
result_components[comp_name].nullable = current_comp.nullable or comp.nullable
|
|
51
56
|
|
|
52
57
|
result = Dataset(name="result", components=result_components, data=None)
|
|
@@ -58,10 +63,9 @@ class Union(Set):
|
|
|
58
63
|
def evaluate(cls, operands: List[Dataset]) -> Dataset:
|
|
59
64
|
result = cls.validate(operands)
|
|
60
65
|
all_datapoints = [ds.data for ds in operands]
|
|
61
|
-
result.data = pd.concat(all_datapoints, sort=True,
|
|
62
|
-
ignore_index=True)
|
|
66
|
+
result.data = pd.concat(all_datapoints, sort=True, ignore_index=True)
|
|
63
67
|
identifiers_names = result.get_identifiers_names()
|
|
64
|
-
result.data = result.data.drop_duplicates(subset=identifiers_names, keep=
|
|
68
|
+
result.data = result.data.drop_duplicates(subset=identifiers_names, keep="first")
|
|
65
69
|
result.data.reset_index(drop=True, inplace=True)
|
|
66
70
|
return result
|
|
67
71
|
|
|
@@ -76,16 +80,22 @@ class Intersection(Set):
|
|
|
76
80
|
if result.data is None:
|
|
77
81
|
result.data = data
|
|
78
82
|
else:
|
|
79
|
-
|
|
80
|
-
|
|
83
|
+
if data is None:
|
|
84
|
+
result.data = pd.DataFrame(columns=result.get_identifiers_names())
|
|
85
|
+
break
|
|
86
|
+
result.data = result.data.merge(
|
|
87
|
+
data, how="inner", on=result.get_identifiers_names()
|
|
88
|
+
)
|
|
81
89
|
|
|
82
|
-
not_identifiers = [
|
|
83
|
-
|
|
90
|
+
not_identifiers = [
|
|
91
|
+
col for col in result.get_measures_names() + result.get_attributes_names()
|
|
92
|
+
]
|
|
84
93
|
|
|
85
94
|
for col in not_identifiers:
|
|
86
95
|
result.data[col] = result.data[col + "_x"]
|
|
87
96
|
result.data = result.data[result.get_identifiers_names() + not_identifiers]
|
|
88
|
-
result.data
|
|
97
|
+
if result.data is not None:
|
|
98
|
+
result.data.reset_index(drop=True, inplace=True)
|
|
89
99
|
return result
|
|
90
100
|
|
|
91
101
|
|
|
@@ -96,35 +106,46 @@ class Symdiff(Set):
|
|
|
96
106
|
result = cls.validate(operands)
|
|
97
107
|
all_datapoints = [ds.data for ds in operands]
|
|
98
108
|
for data in all_datapoints:
|
|
109
|
+
if data is None:
|
|
110
|
+
data = pd.DataFrame(columns=result.get_identifiers_names())
|
|
99
111
|
if result.data is None:
|
|
100
112
|
result.data = data
|
|
101
113
|
else:
|
|
102
114
|
# Realiza la operación equivalente en pyspark.pandas
|
|
103
|
-
result.data = result.data.merge(
|
|
104
|
-
|
|
105
|
-
|
|
115
|
+
result.data = result.data.merge(
|
|
116
|
+
data, how="outer", on=result.get_identifiers_names(), suffixes=("_x", "_y")
|
|
117
|
+
)
|
|
106
118
|
|
|
107
119
|
for measure in result.get_measures_names():
|
|
108
|
-
result.data[
|
|
109
|
-
lambda row:
|
|
110
|
-
|
|
111
|
-
|
|
120
|
+
result.data["_merge"] = result.data.apply(
|
|
121
|
+
lambda row: (
|
|
122
|
+
"left_only"
|
|
123
|
+
if pd.isnull(row[measure + "_y"])
|
|
124
|
+
else ("right_only" if pd.isnull(row[measure + "_x"]) else "both")
|
|
125
|
+
),
|
|
126
|
+
axis=1,
|
|
112
127
|
)
|
|
113
128
|
|
|
114
129
|
not_identifiers = result.get_measures_names() + result.get_attributes_names()
|
|
115
130
|
for col in not_identifiers:
|
|
116
131
|
result.data[col] = result.data.apply(
|
|
117
|
-
lambda x, c=col:
|
|
118
|
-
x[c +
|
|
132
|
+
lambda x, c=col: (
|
|
133
|
+
x[c + "_x"]
|
|
134
|
+
if x["_merge"] == "left_only"
|
|
135
|
+
else (x[c + "_y"] if x["_merge"] == "right_only" else None)
|
|
136
|
+
),
|
|
137
|
+
axis=1,
|
|
138
|
+
)
|
|
119
139
|
result.data = result.data[result.get_identifiers_names() + not_identifiers].dropna()
|
|
120
|
-
result.data
|
|
140
|
+
if result.data is not None:
|
|
141
|
+
result.data = result.data.reset_index(drop=True)
|
|
121
142
|
return result
|
|
122
143
|
|
|
123
144
|
|
|
124
145
|
class Setdiff(Set):
|
|
125
146
|
|
|
126
147
|
@staticmethod
|
|
127
|
-
def has_null(row):
|
|
148
|
+
def has_null(row: Any) -> bool:
|
|
128
149
|
return row.isnull().any()
|
|
129
150
|
|
|
130
151
|
@classmethod
|
|
@@ -135,12 +156,15 @@ class Setdiff(Set):
|
|
|
135
156
|
if result.data is None:
|
|
136
157
|
result.data = data
|
|
137
158
|
else:
|
|
159
|
+
if data is None:
|
|
160
|
+
data = pd.DataFrame(columns=result.get_identifiers_names())
|
|
138
161
|
result.data = result.data.merge(data, how="left", on=result.get_identifiers_names())
|
|
139
162
|
if len(result.data) > 0:
|
|
140
163
|
result.data = result.data[result.data.apply(cls.has_null, axis=1)]
|
|
141
164
|
|
|
142
|
-
not_identifiers = [
|
|
143
|
-
|
|
165
|
+
not_identifiers = [
|
|
166
|
+
col for col in result.get_measures_names() + result.get_attributes_names()
|
|
167
|
+
]
|
|
144
168
|
for col in not_identifiers:
|
|
145
169
|
if col + "_x" in result.data:
|
|
146
170
|
result.data[col] = result.data[col + "_x"]
|
|
@@ -148,5 +172,6 @@ class Setdiff(Set):
|
|
|
148
172
|
if col + "_y" in result.data:
|
|
149
173
|
del result.data[col + "_y"]
|
|
150
174
|
result.data = result.data[result.get_identifiers_names() + not_identifiers]
|
|
151
|
-
result.data
|
|
175
|
+
if result.data is not None:
|
|
176
|
+
result.data.reset_index(drop=True, inplace=True)
|
|
152
177
|
return result
|