vtlengine 1.4.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vtlengine/API/_InternalApi.py +791 -0
- vtlengine/API/__init__.py +612 -0
- vtlengine/API/data/schema/external_routines_schema.json +34 -0
- vtlengine/API/data/schema/json_schema_2.1.json +116 -0
- vtlengine/API/data/schema/value_domain_schema.json +97 -0
- vtlengine/AST/ASTComment.py +57 -0
- vtlengine/AST/ASTConstructor.py +598 -0
- vtlengine/AST/ASTConstructorModules/Expr.py +1928 -0
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +995 -0
- vtlengine/AST/ASTConstructorModules/Terminals.py +790 -0
- vtlengine/AST/ASTConstructorModules/__init__.py +50 -0
- vtlengine/AST/ASTDataExchange.py +10 -0
- vtlengine/AST/ASTEncoders.py +32 -0
- vtlengine/AST/ASTString.py +675 -0
- vtlengine/AST/ASTTemplate.py +558 -0
- vtlengine/AST/ASTVisitor.py +25 -0
- vtlengine/AST/DAG/__init__.py +479 -0
- vtlengine/AST/DAG/_words.py +10 -0
- vtlengine/AST/Grammar/Vtl.g4 +705 -0
- vtlengine/AST/Grammar/VtlTokens.g4 +409 -0
- vtlengine/AST/Grammar/__init__.py +0 -0
- vtlengine/AST/Grammar/lexer.py +2139 -0
- vtlengine/AST/Grammar/parser.py +16597 -0
- vtlengine/AST/Grammar/tokens.py +169 -0
- vtlengine/AST/VtlVisitor.py +824 -0
- vtlengine/AST/__init__.py +674 -0
- vtlengine/DataTypes/TimeHandling.py +562 -0
- vtlengine/DataTypes/__init__.py +863 -0
- vtlengine/DataTypes/_time_checking.py +135 -0
- vtlengine/Exceptions/__exception_file_generator.py +96 -0
- vtlengine/Exceptions/__init__.py +159 -0
- vtlengine/Exceptions/messages.py +1004 -0
- vtlengine/Interpreter/__init__.py +2048 -0
- vtlengine/Model/__init__.py +501 -0
- vtlengine/Operators/Aggregation.py +357 -0
- vtlengine/Operators/Analytic.py +455 -0
- vtlengine/Operators/Assignment.py +23 -0
- vtlengine/Operators/Boolean.py +106 -0
- vtlengine/Operators/CastOperator.py +451 -0
- vtlengine/Operators/Clause.py +366 -0
- vtlengine/Operators/Comparison.py +488 -0
- vtlengine/Operators/Conditional.py +495 -0
- vtlengine/Operators/General.py +191 -0
- vtlengine/Operators/HROperators.py +254 -0
- vtlengine/Operators/Join.py +447 -0
- vtlengine/Operators/Numeric.py +422 -0
- vtlengine/Operators/RoleSetter.py +77 -0
- vtlengine/Operators/Set.py +176 -0
- vtlengine/Operators/String.py +578 -0
- vtlengine/Operators/Time.py +1144 -0
- vtlengine/Operators/Validation.py +275 -0
- vtlengine/Operators/__init__.py +900 -0
- vtlengine/Utils/__Virtual_Assets.py +34 -0
- vtlengine/Utils/__init__.py +479 -0
- vtlengine/__extras_check.py +17 -0
- vtlengine/__init__.py +27 -0
- vtlengine/files/__init__.py +0 -0
- vtlengine/files/output/__init__.py +35 -0
- vtlengine/files/output/_time_period_representation.py +55 -0
- vtlengine/files/parser/__init__.py +240 -0
- vtlengine/files/parser/_rfc_dialect.py +22 -0
- vtlengine/py.typed +0 -0
- vtlengine-1.4.0rc2.dist-info/METADATA +89 -0
- vtlengine-1.4.0rc2.dist-info/RECORD +66 -0
- vtlengine-1.4.0rc2.dist-info/WHEEL +4 -0
- vtlengine-1.4.0rc2.dist-info/licenses/LICENSE.md +661 -0
|
@@ -0,0 +1,422 @@
|
|
|
1
|
+
import _random
|
|
2
|
+
import math
|
|
3
|
+
import operator
|
|
4
|
+
import warnings
|
|
5
|
+
from decimal import Decimal, getcontext
|
|
6
|
+
from typing import Any, Optional, Union
|
|
7
|
+
|
|
8
|
+
import pandas as pd
|
|
9
|
+
|
|
10
|
+
import vtlengine.Operators as Operator
|
|
11
|
+
from vtlengine.AST.Grammar.tokens import (
|
|
12
|
+
ABS,
|
|
13
|
+
CEIL,
|
|
14
|
+
DIV,
|
|
15
|
+
EXP,
|
|
16
|
+
FLOOR,
|
|
17
|
+
LN,
|
|
18
|
+
LOG,
|
|
19
|
+
MINUS,
|
|
20
|
+
MOD,
|
|
21
|
+
MULT,
|
|
22
|
+
PLUS,
|
|
23
|
+
POWER,
|
|
24
|
+
RANDOM,
|
|
25
|
+
ROUND,
|
|
26
|
+
SQRT,
|
|
27
|
+
TRUNC,
|
|
28
|
+
)
|
|
29
|
+
from vtlengine.DataTypes import Integer, Number, binary_implicit_promotion
|
|
30
|
+
from vtlengine.Exceptions import SemanticError
|
|
31
|
+
from vtlengine.Model import DataComponent, Dataset, Scalar
|
|
32
|
+
from vtlengine.Operators import ALL_MODEL_DATA_TYPES
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class Unary(Operator.Unary):
|
|
36
|
+
"""
|
|
37
|
+
Checks that the unary operation is performed with a number.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
type_to_check = Number
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class Binary(Operator.Binary):
|
|
44
|
+
"""
|
|
45
|
+
Checks that the binary operation is performed with numbers.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
type_to_check = Number
|
|
49
|
+
|
|
50
|
+
@classmethod
|
|
51
|
+
def op_func(cls, x: Any, y: Any) -> Any:
|
|
52
|
+
if pd.isnull(x) or pd.isnull(y):
|
|
53
|
+
return None
|
|
54
|
+
if isinstance(x, int) and isinstance(y, int):
|
|
55
|
+
if cls.op == DIV and y == 0:
|
|
56
|
+
raise SemanticError("2-1-15-6", op=cls.op, value=y)
|
|
57
|
+
if cls.op == RANDOM:
|
|
58
|
+
return cls.py_op(x, y)
|
|
59
|
+
x = float(x)
|
|
60
|
+
y = float(y)
|
|
61
|
+
# Handles precision to avoid floating point errors
|
|
62
|
+
if cls.op == DIV and y == 0:
|
|
63
|
+
raise SemanticError("2-1-15-6", op=cls.op, value=y)
|
|
64
|
+
|
|
65
|
+
decimal_value = cls.py_op(Decimal(x), Decimal(y))
|
|
66
|
+
getcontext().prec = 10
|
|
67
|
+
result = float(decimal_value)
|
|
68
|
+
if result.is_integer():
|
|
69
|
+
return int(result)
|
|
70
|
+
return result
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class UnPlus(Unary):
|
|
74
|
+
"""
|
|
75
|
+
`Plus <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=94&zoom=100,72,142> `_ unary operator
|
|
76
|
+
""" # noqa E501
|
|
77
|
+
|
|
78
|
+
op = PLUS
|
|
79
|
+
py_op = operator.pos
|
|
80
|
+
|
|
81
|
+
@classmethod
|
|
82
|
+
def apply_operation_component(cls, series: Any) -> Any:
|
|
83
|
+
return series
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class UnMinus(Unary):
|
|
87
|
+
"""
|
|
88
|
+
`Minus <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=95&zoom=100,72,414> `_unary operator
|
|
89
|
+
""" # noqa E501
|
|
90
|
+
|
|
91
|
+
op = MINUS
|
|
92
|
+
py_op = operator.neg
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class AbsoluteValue(Unary):
|
|
96
|
+
"""
|
|
97
|
+
`Absolute <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=112&zoom=100,72,801> `_ unary operator
|
|
98
|
+
""" # noqa E501
|
|
99
|
+
|
|
100
|
+
op = ABS
|
|
101
|
+
py_op = operator.abs
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class Exponential(Unary):
|
|
105
|
+
"""
|
|
106
|
+
`Exponential <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=114&zoom=100,72,94>`_ unary operator
|
|
107
|
+
""" # noqa E501
|
|
108
|
+
|
|
109
|
+
op = EXP
|
|
110
|
+
py_op = math.exp
|
|
111
|
+
return_type = Number
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class NaturalLogarithm(Unary):
|
|
115
|
+
"""
|
|
116
|
+
`Natural logarithm <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=115&zoom=100,72,394> `_
|
|
117
|
+
unary operator
|
|
118
|
+
""" # noqa E501
|
|
119
|
+
|
|
120
|
+
op = LN
|
|
121
|
+
py_op = math.log
|
|
122
|
+
return_type = Number
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class SquareRoot(Unary):
|
|
126
|
+
"""
|
|
127
|
+
`Square Root <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=119&zoom=100,72,556> '_
|
|
128
|
+
unary operator
|
|
129
|
+
""" # noqa E501
|
|
130
|
+
|
|
131
|
+
op = SQRT
|
|
132
|
+
py_op = math.sqrt
|
|
133
|
+
return_type = Number
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class Ceil(Unary):
|
|
137
|
+
"""
|
|
138
|
+
`Ceilling <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=110&zoom=100,72,94> `_ unary operator
|
|
139
|
+
""" # noqa E501
|
|
140
|
+
|
|
141
|
+
op = CEIL
|
|
142
|
+
py_op = math.ceil
|
|
143
|
+
return_type = Integer
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class Floor(Unary):
|
|
147
|
+
"""
|
|
148
|
+
`Floor <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=111&zoom=100,72,442> `_ unary operator
|
|
149
|
+
""" # noqa E501
|
|
150
|
+
|
|
151
|
+
op = FLOOR
|
|
152
|
+
py_op = math.floor
|
|
153
|
+
return_type = Integer
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class BinPlus(Binary):
|
|
157
|
+
"""
|
|
158
|
+
`Addition <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=96&zoom=100,72,692> `_ binary operator
|
|
159
|
+
""" # noqa E501
|
|
160
|
+
|
|
161
|
+
op = PLUS
|
|
162
|
+
py_op = operator.add
|
|
163
|
+
type_to_check = Number
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class BinMinus(Binary):
|
|
167
|
+
"""
|
|
168
|
+
`Subtraction <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=98&zoom=100,72,448> `_ binary operator
|
|
169
|
+
""" # noqa E501
|
|
170
|
+
|
|
171
|
+
op = MINUS
|
|
172
|
+
py_op = operator.sub
|
|
173
|
+
type_to_check = Number
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
class Mult(Binary):
|
|
177
|
+
"""
|
|
178
|
+
`Multiplication <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=100&zoom=100,72,254>`_
|
|
179
|
+
binary operator
|
|
180
|
+
""" # noqa E501
|
|
181
|
+
|
|
182
|
+
op = MULT
|
|
183
|
+
py_op = operator.mul
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
class Div(Binary):
|
|
187
|
+
"""
|
|
188
|
+
`Division <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=102&zoom=100,72,94>`_
|
|
189
|
+
binary operator
|
|
190
|
+
""" # noqa E501
|
|
191
|
+
|
|
192
|
+
op = DIV
|
|
193
|
+
py_op = operator.truediv
|
|
194
|
+
return_type = Number
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
class Logarithm(Binary):
|
|
198
|
+
"""
|
|
199
|
+
`Logarithm <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=118&zoom=100,72,228>`_ operator
|
|
200
|
+
""" # noqa E501
|
|
201
|
+
|
|
202
|
+
op = LOG
|
|
203
|
+
return_type = Number
|
|
204
|
+
|
|
205
|
+
@classmethod
|
|
206
|
+
def py_op(cls, x: Any, param: Any) -> Any:
|
|
207
|
+
if pd.isnull(param):
|
|
208
|
+
return None
|
|
209
|
+
if param <= 0:
|
|
210
|
+
raise SemanticError("2-1-15-3", op=cls.op, value=param)
|
|
211
|
+
|
|
212
|
+
return math.log(x, param)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
class Modulo(Binary):
|
|
216
|
+
"""
|
|
217
|
+
`Module <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=104&zoom=100,72,94>`_ operator
|
|
218
|
+
""" # noqa E501
|
|
219
|
+
|
|
220
|
+
op = MOD
|
|
221
|
+
py_op = operator.mod
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
class Power(Binary):
|
|
225
|
+
"""
|
|
226
|
+
`Power <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=116&zoom=100,72,693>`_ operator
|
|
227
|
+
""" # noqa E501
|
|
228
|
+
|
|
229
|
+
op = POWER
|
|
230
|
+
return_type = Number
|
|
231
|
+
|
|
232
|
+
@classmethod
|
|
233
|
+
def py_op(cls, x: Any, param: Any) -> Any:
|
|
234
|
+
if pd.isnull(param):
|
|
235
|
+
return None
|
|
236
|
+
return x**param
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
class Parameterized(Unary):
|
|
240
|
+
"""Parametrized class
|
|
241
|
+
Inherits from Unary class, to validate the data type and evaluate if it is the correct one to
|
|
242
|
+
perform the operation. Similar to Unary, but in the end, the param validation is added.
|
|
243
|
+
"""
|
|
244
|
+
|
|
245
|
+
@classmethod
|
|
246
|
+
def validate(
|
|
247
|
+
cls,
|
|
248
|
+
operand: Operator.ALL_MODEL_DATA_TYPES,
|
|
249
|
+
param: Optional[Union[DataComponent, Scalar]] = None,
|
|
250
|
+
) -> Any:
|
|
251
|
+
if param is not None:
|
|
252
|
+
if isinstance(param, Dataset):
|
|
253
|
+
raise SemanticError("1-1-15-8", op=cls.op, comp_type="Dataset")
|
|
254
|
+
if isinstance(param, DataComponent):
|
|
255
|
+
if isinstance(operand, Scalar):
|
|
256
|
+
raise SemanticError(
|
|
257
|
+
"1-1-15-8",
|
|
258
|
+
op=cls.op,
|
|
259
|
+
comp_type="DataComponent and an Scalar operand",
|
|
260
|
+
)
|
|
261
|
+
cls.validate_type_compatibility(param.data_type)
|
|
262
|
+
else:
|
|
263
|
+
cls.validate_scalar_type(param)
|
|
264
|
+
if param is None:
|
|
265
|
+
cls.return_type = Integer
|
|
266
|
+
else:
|
|
267
|
+
cls.return_type = Number
|
|
268
|
+
|
|
269
|
+
return super().validate(operand)
|
|
270
|
+
|
|
271
|
+
@classmethod
|
|
272
|
+
def op_func(cls, x: Any, param: Optional[Any]) -> Any:
|
|
273
|
+
return None if pd.isnull(x) else cls.py_op(x, param)
|
|
274
|
+
|
|
275
|
+
@classmethod
|
|
276
|
+
def apply_operation_two_series(cls, left_series: Any, right_series: Any) -> Any:
|
|
277
|
+
return left_series.combine(right_series, cls.op_func)
|
|
278
|
+
|
|
279
|
+
@classmethod
|
|
280
|
+
def apply_operation_series_scalar(cls, series: Any, param: Any) -> Any:
|
|
281
|
+
return series.map(lambda x: cls.op_func(x, param))
|
|
282
|
+
|
|
283
|
+
@classmethod
|
|
284
|
+
def dataset_evaluation(
|
|
285
|
+
cls, operand: Dataset, param: Optional[Union[DataComponent, Scalar]] = None
|
|
286
|
+
) -> Dataset:
|
|
287
|
+
result = cls.validate(operand, param)
|
|
288
|
+
result.data = operand.data.copy() if operand.data is not None else pd.DataFrame()
|
|
289
|
+
for measure_name in result.get_measures_names():
|
|
290
|
+
try:
|
|
291
|
+
if isinstance(param, DataComponent):
|
|
292
|
+
result.data[measure_name] = cls.apply_operation_two_series(
|
|
293
|
+
result.data[measure_name], param.data
|
|
294
|
+
)
|
|
295
|
+
else:
|
|
296
|
+
param_value = param.value if param is not None else None
|
|
297
|
+
result.data[measure_name] = cls.apply_operation_series_scalar(
|
|
298
|
+
result.data[measure_name], param_value
|
|
299
|
+
)
|
|
300
|
+
except ValueError:
|
|
301
|
+
raise SemanticError(
|
|
302
|
+
"2-1-15-1",
|
|
303
|
+
op=cls.op,
|
|
304
|
+
comp_name=measure_name,
|
|
305
|
+
dataset_name=operand.name,
|
|
306
|
+
) from None
|
|
307
|
+
result.data = result.data[result.get_components_names()]
|
|
308
|
+
return result
|
|
309
|
+
|
|
310
|
+
@classmethod
|
|
311
|
+
def component_evaluation(
|
|
312
|
+
cls,
|
|
313
|
+
operand: DataComponent,
|
|
314
|
+
param: Optional[Union[DataComponent, Scalar]] = None,
|
|
315
|
+
) -> DataComponent:
|
|
316
|
+
result = cls.validate(operand, param)
|
|
317
|
+
if operand.data is None:
|
|
318
|
+
operand.data = pd.Series()
|
|
319
|
+
result.data = operand.data.copy()
|
|
320
|
+
if isinstance(param, DataComponent):
|
|
321
|
+
result.data = cls.apply_operation_two_series(operand.data, param.data)
|
|
322
|
+
else:
|
|
323
|
+
param_value = param.value if param is not None else None
|
|
324
|
+
result.data = cls.apply_operation_series_scalar(operand.data, param_value)
|
|
325
|
+
return result
|
|
326
|
+
|
|
327
|
+
@classmethod
|
|
328
|
+
def scalar_evaluation(cls, operand: Scalar, param: Optional[Any] = None) -> Scalar:
|
|
329
|
+
result = cls.validate(operand, param)
|
|
330
|
+
param_value = param.value if param is not None else None
|
|
331
|
+
result.value = cls.op_func(operand.value, param_value)
|
|
332
|
+
return result
|
|
333
|
+
|
|
334
|
+
@classmethod
|
|
335
|
+
def evaluate(
|
|
336
|
+
cls,
|
|
337
|
+
operand: ALL_MODEL_DATA_TYPES,
|
|
338
|
+
param: Optional[Union[DataComponent, Scalar]] = None,
|
|
339
|
+
) -> Union[DataComponent, Dataset, Scalar]:
|
|
340
|
+
if isinstance(operand, Dataset):
|
|
341
|
+
return cls.dataset_evaluation(operand, param)
|
|
342
|
+
elif isinstance(operand, DataComponent):
|
|
343
|
+
return cls.component_evaluation(operand, param)
|
|
344
|
+
else:
|
|
345
|
+
return cls.scalar_evaluation(operand, param)
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
class Round(Parameterized):
|
|
349
|
+
"""
|
|
350
|
+
`Round <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=106&zoom=100,72,94>`_ operator
|
|
351
|
+
""" # noqa E501
|
|
352
|
+
|
|
353
|
+
op = ROUND
|
|
354
|
+
return_type = Integer
|
|
355
|
+
|
|
356
|
+
@classmethod
|
|
357
|
+
def py_op(cls, x: Any, param: Any) -> Any:
|
|
358
|
+
multiplier = 1.0
|
|
359
|
+
if not pd.isnull(param):
|
|
360
|
+
multiplier = 10**param
|
|
361
|
+
|
|
362
|
+
if x >= 0.0:
|
|
363
|
+
rounded_value = math.floor(x * multiplier + 0.5) / multiplier
|
|
364
|
+
else:
|
|
365
|
+
rounded_value = math.ceil(x * multiplier - 0.5) / multiplier
|
|
366
|
+
|
|
367
|
+
if param is not None:
|
|
368
|
+
return rounded_value
|
|
369
|
+
|
|
370
|
+
return int(rounded_value)
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
class Trunc(Parameterized):
|
|
374
|
+
"""
|
|
375
|
+
`Trunc <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=108&zoom=100,72,94>`_ operator.
|
|
376
|
+
""" # noqa E501
|
|
377
|
+
|
|
378
|
+
op = TRUNC
|
|
379
|
+
|
|
380
|
+
@classmethod
|
|
381
|
+
def py_op(cls, x: float, param: Optional[float]) -> Any:
|
|
382
|
+
multiplier = 1.0
|
|
383
|
+
if not pd.isnull(param) and param is not None:
|
|
384
|
+
multiplier = 10**param
|
|
385
|
+
|
|
386
|
+
truncated_value = int(x * multiplier) / multiplier
|
|
387
|
+
|
|
388
|
+
if not pd.isnull(param):
|
|
389
|
+
return truncated_value
|
|
390
|
+
|
|
391
|
+
return int(truncated_value)
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
class PseudoRandom(_random.Random):
|
|
395
|
+
def __init__(self, seed: Union[int, float]) -> None:
|
|
396
|
+
super().__init__()
|
|
397
|
+
self.seed(seed)
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
class Random(Parameterized):
|
|
401
|
+
op = RANDOM
|
|
402
|
+
return_type = Number
|
|
403
|
+
|
|
404
|
+
@classmethod
|
|
405
|
+
def validate(cls, seed: Any, index: Any = None) -> Any:
|
|
406
|
+
if index.data_type != Integer:
|
|
407
|
+
index.data_type = binary_implicit_promotion(index.data_type, Integer)
|
|
408
|
+
if index.value < 0:
|
|
409
|
+
raise SemanticError("2-1-15-2", op=cls.op, value=index)
|
|
410
|
+
if index.value > 10000:
|
|
411
|
+
warnings.warn(
|
|
412
|
+
"Random: The value of 'index' is very big. This can affect performance.",
|
|
413
|
+
UserWarning,
|
|
414
|
+
)
|
|
415
|
+
return super().validate(seed, index)
|
|
416
|
+
|
|
417
|
+
@classmethod
|
|
418
|
+
def py_op(cls, seed: Union[int, float], index: int) -> float:
|
|
419
|
+
instance: PseudoRandom = PseudoRandom(seed)
|
|
420
|
+
for _ in range(index):
|
|
421
|
+
instance.random()
|
|
422
|
+
return instance.random().__round__(6)
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
from copy import copy
|
|
2
|
+
from typing import Any, Union
|
|
3
|
+
|
|
4
|
+
# if os.environ.get("SPARK", False):
|
|
5
|
+
# import pyspark.pandas as pd
|
|
6
|
+
# else:
|
|
7
|
+
# import pandas as pd
|
|
8
|
+
import pandas as pd
|
|
9
|
+
|
|
10
|
+
from vtlengine.Exceptions import SemanticError
|
|
11
|
+
from vtlengine.Model import DataComponent, Role, Scalar
|
|
12
|
+
from vtlengine.Operators import Unary
|
|
13
|
+
|
|
14
|
+
ALLOWED_MODEL_TYPES = Union[DataComponent, Scalar]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class RoleSetter(Unary):
|
|
18
|
+
role: Role
|
|
19
|
+
|
|
20
|
+
@classmethod
|
|
21
|
+
def validate(cls, operand: ALLOWED_MODEL_TYPES, data_size: int = 0) -> DataComponent:
|
|
22
|
+
if isinstance(operand, Scalar):
|
|
23
|
+
nullable = True
|
|
24
|
+
if cls.role == Role.IDENTIFIER or operand.value is not None:
|
|
25
|
+
nullable = False
|
|
26
|
+
return DataComponent(
|
|
27
|
+
name=operand.name,
|
|
28
|
+
data_type=operand.data_type,
|
|
29
|
+
role=cls.role,
|
|
30
|
+
nullable=nullable,
|
|
31
|
+
data=None,
|
|
32
|
+
)
|
|
33
|
+
operand.role = cls.role
|
|
34
|
+
return copy(operand)
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def evaluate(cls, operand: Any, data_size: int = 0) -> DataComponent:
|
|
38
|
+
if (
|
|
39
|
+
isinstance(operand, DataComponent)
|
|
40
|
+
and operand.data is not None
|
|
41
|
+
and not operand.nullable
|
|
42
|
+
and any(operand.data.isnull())
|
|
43
|
+
):
|
|
44
|
+
raise SemanticError("1-1-1-16")
|
|
45
|
+
result = cls.validate(operand, data_size)
|
|
46
|
+
if isinstance(operand, Scalar):
|
|
47
|
+
result.data = pd.Series([operand.value] * data_size, dtype=object)
|
|
48
|
+
else:
|
|
49
|
+
result.data = operand.data
|
|
50
|
+
return result
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class Identifier(RoleSetter):
|
|
54
|
+
role = Role.IDENTIFIER
|
|
55
|
+
|
|
56
|
+
@classmethod
|
|
57
|
+
def validate(cls, operand: ALLOWED_MODEL_TYPES, data_size: int = 0) -> DataComponent:
|
|
58
|
+
result = super().validate(operand)
|
|
59
|
+
if result.nullable:
|
|
60
|
+
raise SemanticError("1-1-1-16")
|
|
61
|
+
return result
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
def evaluate( # type: ignore[override]
|
|
65
|
+
cls, operand: ALLOWED_MODEL_TYPES, data_size: int = 0
|
|
66
|
+
) -> DataComponent:
|
|
67
|
+
if isinstance(operand, Scalar) and operand.value is None:
|
|
68
|
+
raise SemanticError("1-1-1-16")
|
|
69
|
+
return super().evaluate(operand, data_size)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class Attribute(RoleSetter):
|
|
73
|
+
role = Role.ATTRIBUTE
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class Measure(RoleSetter):
|
|
77
|
+
role = Role.MEASURE
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
from typing import Any, Dict, List
|
|
2
|
+
|
|
3
|
+
# if os.environ.get("SPARK"):
|
|
4
|
+
# import pyspark.pandas as pd
|
|
5
|
+
# else:
|
|
6
|
+
# import pandas as pd
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
from vtlengine.DataTypes import binary_implicit_promotion
|
|
10
|
+
from vtlengine.Exceptions import SemanticError
|
|
11
|
+
from vtlengine.Model import Dataset
|
|
12
|
+
from vtlengine.Operators import Operator
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Set(Operator):
|
|
16
|
+
@classmethod
|
|
17
|
+
def check_same_structure(cls, dataset_1: Dataset, dataset_2: Dataset) -> None:
|
|
18
|
+
if len(dataset_1.components) != len(dataset_2.components):
|
|
19
|
+
raise SemanticError(
|
|
20
|
+
"1-1-17-1",
|
|
21
|
+
op=cls.op,
|
|
22
|
+
dataset_1=dataset_1.name,
|
|
23
|
+
dataset_2=dataset_2.name,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
for comp in dataset_1.components.values():
|
|
27
|
+
if comp.name not in dataset_2.components:
|
|
28
|
+
raise Exception(f"Component {comp.name} not found in dataset {dataset_2.name}")
|
|
29
|
+
second_comp = dataset_2.components[comp.name]
|
|
30
|
+
binary_implicit_promotion(
|
|
31
|
+
comp.data_type,
|
|
32
|
+
second_comp.data_type,
|
|
33
|
+
cls.type_to_check,
|
|
34
|
+
cls.return_type,
|
|
35
|
+
)
|
|
36
|
+
if comp.role != second_comp.role:
|
|
37
|
+
raise Exception(
|
|
38
|
+
f"Component {comp.name} has different roles "
|
|
39
|
+
f"in datasets {dataset_1.name} and {dataset_2.name}"
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def validate(cls, operands: List[Dataset]) -> Dataset:
|
|
44
|
+
base_operand = operands[0]
|
|
45
|
+
for operand in operands[1:]:
|
|
46
|
+
cls.check_same_structure(base_operand, operand)
|
|
47
|
+
|
|
48
|
+
result_components: Dict[str, Any] = {}
|
|
49
|
+
for operand in operands:
|
|
50
|
+
if len(result_components) == 0:
|
|
51
|
+
result_components = operand.components
|
|
52
|
+
else:
|
|
53
|
+
for comp_name, comp in operand.components.items():
|
|
54
|
+
current_comp = result_components[comp_name]
|
|
55
|
+
result_components[comp_name].data_type = binary_implicit_promotion(
|
|
56
|
+
current_comp.data_type, comp.data_type
|
|
57
|
+
)
|
|
58
|
+
result_components[comp_name].nullable = current_comp.nullable or comp.nullable
|
|
59
|
+
|
|
60
|
+
result = Dataset(name="result", components=result_components, data=None)
|
|
61
|
+
return result
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class Union(Set):
|
|
65
|
+
@classmethod
|
|
66
|
+
def evaluate(cls, operands: List[Dataset]) -> Dataset:
|
|
67
|
+
result = cls.validate(operands)
|
|
68
|
+
all_datapoints = [ds.data for ds in operands]
|
|
69
|
+
result.data = pd.concat(all_datapoints, sort=True, ignore_index=True)
|
|
70
|
+
identifiers_names = result.get_identifiers_names()
|
|
71
|
+
result.data = result.data.drop_duplicates(subset=identifiers_names, keep="first")
|
|
72
|
+
result.data.reset_index(drop=True, inplace=True)
|
|
73
|
+
return result
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class Intersection(Set):
|
|
77
|
+
@classmethod
|
|
78
|
+
def evaluate(cls, operands: List[Dataset]) -> Dataset:
|
|
79
|
+
result = cls.validate(operands)
|
|
80
|
+
all_datapoints = [ds.data for ds in operands]
|
|
81
|
+
for data in all_datapoints:
|
|
82
|
+
if result.data is None:
|
|
83
|
+
result.data = data
|
|
84
|
+
else:
|
|
85
|
+
if data is None:
|
|
86
|
+
result.data = pd.DataFrame(columns=result.get_identifiers_names())
|
|
87
|
+
break
|
|
88
|
+
result.data = result.data.merge(
|
|
89
|
+
data, how="inner", on=result.get_identifiers_names()
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
not_identifiers = result.get_measures_names() + result.get_attributes_names()
|
|
93
|
+
|
|
94
|
+
for col in not_identifiers:
|
|
95
|
+
result.data[col] = result.data[col + "_x"]
|
|
96
|
+
result.data = result.data[result.get_identifiers_names() + not_identifiers]
|
|
97
|
+
if result.data is not None:
|
|
98
|
+
result.data.reset_index(drop=True, inplace=True)
|
|
99
|
+
return result
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class Symdiff(Set):
|
|
103
|
+
@classmethod
|
|
104
|
+
def evaluate(cls, operands: List[Dataset]) -> Dataset:
|
|
105
|
+
result = cls.validate(operands)
|
|
106
|
+
all_datapoints = [ds.data for ds in operands]
|
|
107
|
+
for data in all_datapoints:
|
|
108
|
+
if data is None:
|
|
109
|
+
data = pd.DataFrame(columns=result.get_identifiers_names())
|
|
110
|
+
if result.data is None:
|
|
111
|
+
result.data = data
|
|
112
|
+
else:
|
|
113
|
+
# Realiza la operación equivalente en pyspark.pandas
|
|
114
|
+
result.data = result.data.merge(
|
|
115
|
+
data,
|
|
116
|
+
how="outer",
|
|
117
|
+
on=result.get_identifiers_names(),
|
|
118
|
+
suffixes=("_x", "_y"),
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
for measure in result.get_measures_names():
|
|
122
|
+
result.data["_merge"] = result.data.apply(
|
|
123
|
+
lambda row: (
|
|
124
|
+
"left_only"
|
|
125
|
+
if pd.isnull(row[f"{measure}_y"])
|
|
126
|
+
else ("right_only" if pd.isnull(row[f"{measure}_x"]) else "both")
|
|
127
|
+
),
|
|
128
|
+
axis=1,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
not_identifiers = result.get_measures_names() + result.get_attributes_names()
|
|
132
|
+
for col in not_identifiers:
|
|
133
|
+
result.data[col] = result.data.apply(
|
|
134
|
+
lambda x, c=col: (
|
|
135
|
+
x[c + "_x"]
|
|
136
|
+
if x["_merge"] == "left_only"
|
|
137
|
+
else (x[c + "_y"] if x["_merge"] == "right_only" else None)
|
|
138
|
+
),
|
|
139
|
+
axis=1,
|
|
140
|
+
)
|
|
141
|
+
result.data = result.data[result.get_identifiers_names() + not_identifiers].dropna()
|
|
142
|
+
if result.data is not None:
|
|
143
|
+
result.data = result.data.reset_index(drop=True)
|
|
144
|
+
return result
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class Setdiff(Set):
|
|
148
|
+
@staticmethod
|
|
149
|
+
def has_null(row: Any) -> bool:
|
|
150
|
+
return row.isnull().any()
|
|
151
|
+
|
|
152
|
+
@classmethod
|
|
153
|
+
def evaluate(cls, operands: List[Dataset]) -> Dataset:
|
|
154
|
+
result = cls.validate(operands)
|
|
155
|
+
all_datapoints = [ds.data for ds in operands]
|
|
156
|
+
for data in all_datapoints:
|
|
157
|
+
if result.data is None:
|
|
158
|
+
result.data = data
|
|
159
|
+
else:
|
|
160
|
+
if data is None:
|
|
161
|
+
data = pd.DataFrame(columns=result.get_identifiers_names())
|
|
162
|
+
result.data = result.data.merge(data, how="left", on=result.get_identifiers_names())
|
|
163
|
+
if len(result.data) > 0:
|
|
164
|
+
result.data = result.data[result.data.apply(cls.has_null, axis=1)]
|
|
165
|
+
|
|
166
|
+
not_identifiers = result.get_measures_names() + result.get_attributes_names()
|
|
167
|
+
for col in not_identifiers:
|
|
168
|
+
if col + "_x" in result.data:
|
|
169
|
+
result.data[col] = result.data[col + "_x"]
|
|
170
|
+
del result.data[col + "_x"]
|
|
171
|
+
if col + "_y" in result.data:
|
|
172
|
+
del result.data[col + "_y"]
|
|
173
|
+
result.data = result.data[result.get_identifiers_names() + not_identifiers]
|
|
174
|
+
if result.data is not None:
|
|
175
|
+
result.data.reset_index(drop=True, inplace=True)
|
|
176
|
+
return result
|