vtlengine 1.4.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vtlengine/API/_InternalApi.py +791 -0
- vtlengine/API/__init__.py +612 -0
- vtlengine/API/data/schema/external_routines_schema.json +34 -0
- vtlengine/API/data/schema/json_schema_2.1.json +116 -0
- vtlengine/API/data/schema/value_domain_schema.json +97 -0
- vtlengine/AST/ASTComment.py +57 -0
- vtlengine/AST/ASTConstructor.py +598 -0
- vtlengine/AST/ASTConstructorModules/Expr.py +1928 -0
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +995 -0
- vtlengine/AST/ASTConstructorModules/Terminals.py +790 -0
- vtlengine/AST/ASTConstructorModules/__init__.py +50 -0
- vtlengine/AST/ASTDataExchange.py +10 -0
- vtlengine/AST/ASTEncoders.py +32 -0
- vtlengine/AST/ASTString.py +675 -0
- vtlengine/AST/ASTTemplate.py +558 -0
- vtlengine/AST/ASTVisitor.py +25 -0
- vtlengine/AST/DAG/__init__.py +479 -0
- vtlengine/AST/DAG/_words.py +10 -0
- vtlengine/AST/Grammar/Vtl.g4 +705 -0
- vtlengine/AST/Grammar/VtlTokens.g4 +409 -0
- vtlengine/AST/Grammar/__init__.py +0 -0
- vtlengine/AST/Grammar/lexer.py +2139 -0
- vtlengine/AST/Grammar/parser.py +16597 -0
- vtlengine/AST/Grammar/tokens.py +169 -0
- vtlengine/AST/VtlVisitor.py +824 -0
- vtlengine/AST/__init__.py +674 -0
- vtlengine/DataTypes/TimeHandling.py +562 -0
- vtlengine/DataTypes/__init__.py +863 -0
- vtlengine/DataTypes/_time_checking.py +135 -0
- vtlengine/Exceptions/__exception_file_generator.py +96 -0
- vtlengine/Exceptions/__init__.py +159 -0
- vtlengine/Exceptions/messages.py +1004 -0
- vtlengine/Interpreter/__init__.py +2048 -0
- vtlengine/Model/__init__.py +501 -0
- vtlengine/Operators/Aggregation.py +357 -0
- vtlengine/Operators/Analytic.py +455 -0
- vtlengine/Operators/Assignment.py +23 -0
- vtlengine/Operators/Boolean.py +106 -0
- vtlengine/Operators/CastOperator.py +451 -0
- vtlengine/Operators/Clause.py +366 -0
- vtlengine/Operators/Comparison.py +488 -0
- vtlengine/Operators/Conditional.py +495 -0
- vtlengine/Operators/General.py +191 -0
- vtlengine/Operators/HROperators.py +254 -0
- vtlengine/Operators/Join.py +447 -0
- vtlengine/Operators/Numeric.py +422 -0
- vtlengine/Operators/RoleSetter.py +77 -0
- vtlengine/Operators/Set.py +176 -0
- vtlengine/Operators/String.py +578 -0
- vtlengine/Operators/Time.py +1144 -0
- vtlengine/Operators/Validation.py +275 -0
- vtlengine/Operators/__init__.py +900 -0
- vtlengine/Utils/__Virtual_Assets.py +34 -0
- vtlengine/Utils/__init__.py +479 -0
- vtlengine/__extras_check.py +17 -0
- vtlengine/__init__.py +27 -0
- vtlengine/files/__init__.py +0 -0
- vtlengine/files/output/__init__.py +35 -0
- vtlengine/files/output/_time_period_representation.py +55 -0
- vtlengine/files/parser/__init__.py +240 -0
- vtlengine/files/parser/_rfc_dialect.py +22 -0
- vtlengine/py.typed +0 -0
- vtlengine-1.4.0rc2.dist-info/METADATA +89 -0
- vtlengine-1.4.0rc2.dist-info/RECORD +66 -0
- vtlengine-1.4.0rc2.dist-info/WHEEL +4 -0
- vtlengine-1.4.0rc2.dist-info/licenses/LICENSE.md +661 -0
|
@@ -0,0 +1,495 @@
|
|
|
1
|
+
from copy import copy
|
|
2
|
+
from typing import Any, List, Union
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
# if os.environ.get("SPARK", False):
|
|
7
|
+
# import pyspark.pandas as pd
|
|
8
|
+
# else:
|
|
9
|
+
# import pandas as pd
|
|
10
|
+
import pandas as pd
|
|
11
|
+
|
|
12
|
+
from vtlengine.DataTypes import (
|
|
13
|
+
COMP_NAME_MAPPING,
|
|
14
|
+
SCALAR_TYPES_CLASS_REVERSE,
|
|
15
|
+
Boolean,
|
|
16
|
+
Null,
|
|
17
|
+
binary_implicit_promotion,
|
|
18
|
+
)
|
|
19
|
+
from vtlengine.Exceptions import SemanticError
|
|
20
|
+
from vtlengine.Model import DataComponent, Dataset, Role, Scalar
|
|
21
|
+
from vtlengine.Operators import Binary, Operator
|
|
22
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class If(Operator):
|
|
26
|
+
"""
|
|
27
|
+
If class:
|
|
28
|
+
`If-then-else <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=225&zoom=100,72,142>`_ operator
|
|
29
|
+
inherits from Operator, a superclass that contains general validate and evaluate class methods.
|
|
30
|
+
It has the following class methods:
|
|
31
|
+
Class methods:
|
|
32
|
+
evaluate: Evaluates if the operation is well constructed, checking the actual condition and
|
|
33
|
+
dropping a boolean result.
|
|
34
|
+
The result will depend on the data class, such as datacomponent and dataset.
|
|
35
|
+
|
|
36
|
+
component_level_evaluation: Returns a pandas dataframe with data to set the condition
|
|
37
|
+
|
|
38
|
+
dataset_level_evaluation: Sets the dataset and evaluates its correct schema to be able to perform the condition.
|
|
39
|
+
|
|
40
|
+
validate: Class method that has two branches so datacomponent and datasets can be validated. With datacomponent,
|
|
41
|
+
the code reviews if it is actually a Measure and if it is a binary operation. Dataset branch reviews if the
|
|
42
|
+
identifiers are the same in 'if', 'then' and 'else'.
|
|
43
|
+
""" # noqa E501
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
def evaluate(cls, condition: Any, true_branch: Any, false_branch: Any) -> Any:
|
|
47
|
+
result = cls.validate(condition, true_branch, false_branch)
|
|
48
|
+
if not isinstance(result, Scalar):
|
|
49
|
+
if isinstance(condition, DataComponent):
|
|
50
|
+
result.data = cls.component_level_evaluation(condition, true_branch, false_branch)
|
|
51
|
+
if isinstance(condition, Dataset):
|
|
52
|
+
result = cls.dataset_level_evaluation(result, condition, true_branch, false_branch)
|
|
53
|
+
return result
|
|
54
|
+
|
|
55
|
+
@classmethod
|
|
56
|
+
def component_level_evaluation(
|
|
57
|
+
cls, condition: DataComponent, true_branch: Any, false_branch: Any
|
|
58
|
+
) -> Any:
|
|
59
|
+
result = None
|
|
60
|
+
if condition.data is not None:
|
|
61
|
+
if isinstance(true_branch, Scalar):
|
|
62
|
+
true_data = pd.Series(true_branch.value, index=condition.data.index)
|
|
63
|
+
else:
|
|
64
|
+
true_data = true_branch.data.reindex(condition.data.index)
|
|
65
|
+
if isinstance(false_branch, Scalar):
|
|
66
|
+
false_data = pd.Series(false_branch.value, index=condition.data.index)
|
|
67
|
+
else:
|
|
68
|
+
false_data = false_branch.data.reindex(condition.data.index)
|
|
69
|
+
condition.data = condition.data.fillna(False)
|
|
70
|
+
result = np.where(condition.data, true_data, false_data)
|
|
71
|
+
|
|
72
|
+
return pd.Series(result, index=condition.data.index) # type: ignore[union-attr]
|
|
73
|
+
|
|
74
|
+
@classmethod
|
|
75
|
+
def dataset_level_evaluation(
|
|
76
|
+
cls, result: Any, condition: Any, true_branch: Any, false_branch: Any
|
|
77
|
+
) -> Dataset:
|
|
78
|
+
ids = condition.get_identifiers_names()
|
|
79
|
+
condition_measure = condition.get_measures_names()[0]
|
|
80
|
+
true_data = condition.data[condition.data[condition_measure].dropna() == True]
|
|
81
|
+
false_data = condition.data[condition.data[condition_measure] != True]
|
|
82
|
+
|
|
83
|
+
if isinstance(true_branch, Dataset):
|
|
84
|
+
if len(true_data) > 0 and true_branch.data is not None:
|
|
85
|
+
true_data = pd.merge(
|
|
86
|
+
true_data,
|
|
87
|
+
true_branch.data,
|
|
88
|
+
on=ids,
|
|
89
|
+
how="left",
|
|
90
|
+
suffixes=("_condition", ""),
|
|
91
|
+
)
|
|
92
|
+
else:
|
|
93
|
+
true_data = pd.DataFrame(columns=true_branch.get_components_names())
|
|
94
|
+
else:
|
|
95
|
+
true_data[condition_measure] = true_data[condition_measure].apply(
|
|
96
|
+
lambda x: true_branch.value
|
|
97
|
+
)
|
|
98
|
+
if isinstance(false_branch, Dataset):
|
|
99
|
+
if len(false_data) > 0 and false_branch.data is not None:
|
|
100
|
+
false_data = pd.merge(
|
|
101
|
+
false_data,
|
|
102
|
+
false_branch.data,
|
|
103
|
+
on=ids,
|
|
104
|
+
how="left",
|
|
105
|
+
suffixes=("_condition", ""),
|
|
106
|
+
)
|
|
107
|
+
else:
|
|
108
|
+
false_data = pd.DataFrame(columns=false_branch.get_components_names())
|
|
109
|
+
else:
|
|
110
|
+
false_data[condition_measure] = false_data[condition_measure].apply(
|
|
111
|
+
lambda x: false_branch.value
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
result.data = (
|
|
115
|
+
pd.concat([true_data, false_data], ignore_index=True)
|
|
116
|
+
.drop_duplicates()
|
|
117
|
+
.sort_values(by=ids)
|
|
118
|
+
).reset_index(drop=True)
|
|
119
|
+
if isinstance(result, Dataset):
|
|
120
|
+
drop_columns = [
|
|
121
|
+
column for column in result.data.columns if column not in result.components
|
|
122
|
+
]
|
|
123
|
+
result.data = result.data.drop(columns=drop_columns)
|
|
124
|
+
if isinstance(true_branch, Scalar) and isinstance(false_branch, Scalar):
|
|
125
|
+
result.get_measures()[0].data_type = true_branch.data_type
|
|
126
|
+
result.get_measures()[0].name = COMP_NAME_MAPPING[true_branch.data_type]
|
|
127
|
+
if result.data is not None:
|
|
128
|
+
result.data = result.data.rename(
|
|
129
|
+
columns={condition_measure: result.get_measures()[0].name}
|
|
130
|
+
)
|
|
131
|
+
return result
|
|
132
|
+
|
|
133
|
+
@classmethod
|
|
134
|
+
def validate( # noqa: C901
|
|
135
|
+
cls, condition: Any, true_branch: Any, false_branch: Any
|
|
136
|
+
) -> Union[Scalar, DataComponent, Dataset]:
|
|
137
|
+
nullable = False
|
|
138
|
+
left = true_branch
|
|
139
|
+
right = false_branch
|
|
140
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
141
|
+
if true_branch.__class__ != false_branch.__class__:
|
|
142
|
+
if (isinstance(true_branch, DataComponent) and isinstance(false_branch, Dataset)) or (
|
|
143
|
+
isinstance(true_branch, Dataset) and isinstance(false_branch, DataComponent)
|
|
144
|
+
):
|
|
145
|
+
raise ValueError(
|
|
146
|
+
"If then and else operands cannot be dataset and component respectively"
|
|
147
|
+
)
|
|
148
|
+
if isinstance(true_branch, Scalar):
|
|
149
|
+
left = false_branch
|
|
150
|
+
right = true_branch
|
|
151
|
+
|
|
152
|
+
# Datacomponent
|
|
153
|
+
comp_name = VirtualCounter._new_dc_name()
|
|
154
|
+
if isinstance(condition, DataComponent):
|
|
155
|
+
if not condition.data_type == Boolean:
|
|
156
|
+
raise SemanticError(
|
|
157
|
+
"1-1-9-11",
|
|
158
|
+
op=cls.op,
|
|
159
|
+
type=SCALAR_TYPES_CLASS_REVERSE[condition.data_type],
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
if (
|
|
163
|
+
isinstance(left, Scalar)
|
|
164
|
+
and isinstance(right, Scalar)
|
|
165
|
+
and (left.data_type == Null or right.data_type == Null)
|
|
166
|
+
):
|
|
167
|
+
nullable = True
|
|
168
|
+
if isinstance(left, DataComponent) and isinstance(right, DataComponent):
|
|
169
|
+
nullable = left.nullable or right.nullable
|
|
170
|
+
elif isinstance(left, DataComponent):
|
|
171
|
+
nullable = left.nullable or right.data_type == Null
|
|
172
|
+
elif isinstance(right, DataComponent):
|
|
173
|
+
nullable = left.data_type == Null or right.nullable
|
|
174
|
+
return DataComponent(
|
|
175
|
+
name=comp_name,
|
|
176
|
+
data=None,
|
|
177
|
+
data_type=binary_implicit_promotion(left.data_type, right.data_type),
|
|
178
|
+
role=Role.MEASURE,
|
|
179
|
+
nullable=nullable,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
# Dataset
|
|
183
|
+
if isinstance(left, Scalar) and isinstance(right, Scalar):
|
|
184
|
+
raise SemanticError(
|
|
185
|
+
"1-1-9-12", op=cls.op, then_symbol=left.name, else_symbol=right.name
|
|
186
|
+
)
|
|
187
|
+
if isinstance(left, DataComponent):
|
|
188
|
+
raise SemanticError(
|
|
189
|
+
"1-1-9-12", op=cls.op, then_symbol=left.name, else_symbol=right.name
|
|
190
|
+
)
|
|
191
|
+
if isinstance(left, Scalar):
|
|
192
|
+
left.data_type = right.data_type = binary_implicit_promotion(
|
|
193
|
+
left.data_type, right.data_type
|
|
194
|
+
)
|
|
195
|
+
return Dataset(name=dataset_name, components=copy(condition.components), data=None)
|
|
196
|
+
if left.get_identifiers() != condition.get_identifiers():
|
|
197
|
+
raise SemanticError("1-1-9-10", op=cls.op, clause=left.name)
|
|
198
|
+
if isinstance(right, Scalar):
|
|
199
|
+
for component in left.get_measures():
|
|
200
|
+
if component.data_type != right.data_type:
|
|
201
|
+
component.data_type = binary_implicit_promotion(
|
|
202
|
+
component.data_type, right.data_type
|
|
203
|
+
)
|
|
204
|
+
if isinstance(right, Dataset):
|
|
205
|
+
if left.get_identifiers() != condition.get_identifiers():
|
|
206
|
+
raise SemanticError("1-1-9-10", op=cls.op, clause=right.name)
|
|
207
|
+
if left.get_components_names() != right.get_components_names():
|
|
208
|
+
raise SemanticError("1-1-9-13", op=cls.op, then=left.name, else_clause=right.name)
|
|
209
|
+
for component in left.get_measures():
|
|
210
|
+
if component.data_type != right.components[component.name].data_type:
|
|
211
|
+
component.data_type = right.components[component.name].data_type = (
|
|
212
|
+
binary_implicit_promotion(
|
|
213
|
+
component.data_type,
|
|
214
|
+
right.components[component.name].data_type,
|
|
215
|
+
)
|
|
216
|
+
)
|
|
217
|
+
if isinstance(condition, Dataset):
|
|
218
|
+
if len(condition.get_measures()) != 1:
|
|
219
|
+
raise SemanticError("1-1-9-4", op=cls.op, name=condition.name)
|
|
220
|
+
if condition.get_measures()[0].data_type != Boolean:
|
|
221
|
+
raise SemanticError(
|
|
222
|
+
"1-1-9-5",
|
|
223
|
+
op=cls.op,
|
|
224
|
+
type=SCALAR_TYPES_CLASS_REVERSE[condition.get_measures()[0].data_type],
|
|
225
|
+
)
|
|
226
|
+
if left.get_identifiers() != condition.get_identifiers():
|
|
227
|
+
raise SemanticError("1-1-9-6", op=cls.op)
|
|
228
|
+
result_components = {comp_name: copy(comp) for comp_name, comp in left.components.items()}
|
|
229
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
class Nvl(Binary):
|
|
233
|
+
"""
|
|
234
|
+
Null class:
|
|
235
|
+
`Nvl <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=229&zoom=100,72,370>`_operator class.
|
|
236
|
+
It has the following class methods:
|
|
237
|
+
|
|
238
|
+
Class methods:
|
|
239
|
+
Validate: Class method that validates if the operation at scalar,
|
|
240
|
+
datacomponent or dataset level can be performed.
|
|
241
|
+
Evaluate: Evaluates the actual operation, returning the result.
|
|
242
|
+
""" # noqa E501
|
|
243
|
+
|
|
244
|
+
@classmethod
|
|
245
|
+
def evaluate(cls, left: Any, right: Any) -> Union[Scalar, DataComponent, Dataset]:
|
|
246
|
+
result = cls.validate(left, right)
|
|
247
|
+
|
|
248
|
+
if isinstance(left, Scalar) and isinstance(result, Scalar):
|
|
249
|
+
if left.data_type is Null:
|
|
250
|
+
result.data_type = right.data_type
|
|
251
|
+
result.value = right.value
|
|
252
|
+
elif right.data_type is Null:
|
|
253
|
+
result.data_type = left.data_type
|
|
254
|
+
result.value = left.value
|
|
255
|
+
else:
|
|
256
|
+
result.data_type = left.data_type
|
|
257
|
+
result.value = left.value
|
|
258
|
+
|
|
259
|
+
else:
|
|
260
|
+
if not isinstance(result, Scalar):
|
|
261
|
+
if isinstance(right, Scalar):
|
|
262
|
+
result.data = left.data.fillna(right.value)
|
|
263
|
+
else:
|
|
264
|
+
result.data = left.data.fillna(right.data)
|
|
265
|
+
if isinstance(result, Dataset):
|
|
266
|
+
result.data = result.data[result.get_components_names()]
|
|
267
|
+
return result
|
|
268
|
+
|
|
269
|
+
@classmethod
|
|
270
|
+
def validate(cls, left: Any, right: Any) -> Union[Scalar, DataComponent, Dataset]:
|
|
271
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
272
|
+
comp_name = VirtualCounter._new_dc_name()
|
|
273
|
+
result_components = {}
|
|
274
|
+
if isinstance(left, Scalar):
|
|
275
|
+
if not isinstance(right, Scalar):
|
|
276
|
+
raise ValueError(
|
|
277
|
+
"Nvl operation at scalar level must have scalar "
|
|
278
|
+
"types on right (applicable) side"
|
|
279
|
+
)
|
|
280
|
+
cls.type_validation(left.data_type, right.data_type)
|
|
281
|
+
return Scalar(name="result", value=None, data_type=left.data_type)
|
|
282
|
+
if isinstance(left, DataComponent):
|
|
283
|
+
if isinstance(right, Dataset):
|
|
284
|
+
raise ValueError(
|
|
285
|
+
"Nvl operation at component level cannot have "
|
|
286
|
+
"dataset type on right (applicable) side"
|
|
287
|
+
)
|
|
288
|
+
cls.type_validation(left.data_type, right.data_type)
|
|
289
|
+
return DataComponent(
|
|
290
|
+
name=comp_name,
|
|
291
|
+
data=pd.Series(dtype=object),
|
|
292
|
+
data_type=left.data_type,
|
|
293
|
+
role=Role.MEASURE,
|
|
294
|
+
nullable=False,
|
|
295
|
+
)
|
|
296
|
+
if isinstance(left, Dataset):
|
|
297
|
+
if isinstance(right, DataComponent):
|
|
298
|
+
raise ValueError(
|
|
299
|
+
"Nvl operation at dataset level cannot have component "
|
|
300
|
+
"type on right (applicable) side"
|
|
301
|
+
)
|
|
302
|
+
if isinstance(right, Scalar):
|
|
303
|
+
for component in left.get_measures():
|
|
304
|
+
cls.type_validation(component.data_type, right.data_type)
|
|
305
|
+
if isinstance(right, Dataset):
|
|
306
|
+
for component in left.get_measures():
|
|
307
|
+
cls.type_validation(
|
|
308
|
+
component.data_type, right.components[component.name].data_type
|
|
309
|
+
)
|
|
310
|
+
result_components = {
|
|
311
|
+
comp_name: copy(comp)
|
|
312
|
+
for comp_name, comp in left.components.items()
|
|
313
|
+
if comp.role != Role.ATTRIBUTE
|
|
314
|
+
}
|
|
315
|
+
for comp in result_components.values():
|
|
316
|
+
comp.nullable = False
|
|
317
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
class Case(Operator):
|
|
321
|
+
@classmethod
|
|
322
|
+
def evaluate(
|
|
323
|
+
cls, conditions: List[Any], thenOps: List[Any], elseOp: Any
|
|
324
|
+
) -> Union[Scalar, DataComponent, Dataset]:
|
|
325
|
+
result = cls.validate(conditions, thenOps, elseOp)
|
|
326
|
+
|
|
327
|
+
for condition in conditions:
|
|
328
|
+
if isinstance(condition, Dataset) and condition.data is not None:
|
|
329
|
+
condition.data.fillna(False, inplace=True)
|
|
330
|
+
condition_measure = condition.get_measures_names()[0]
|
|
331
|
+
if condition.data[condition_measure].dtype != bool:
|
|
332
|
+
condition.data[condition_measure] = condition.data[condition_measure].astype(
|
|
333
|
+
bool
|
|
334
|
+
)
|
|
335
|
+
elif (
|
|
336
|
+
isinstance(
|
|
337
|
+
condition,
|
|
338
|
+
DataComponent,
|
|
339
|
+
)
|
|
340
|
+
and condition.data is not None
|
|
341
|
+
):
|
|
342
|
+
condition.data.fillna(False, inplace=True)
|
|
343
|
+
if condition.data.dtype != bool:
|
|
344
|
+
condition.data = condition.data.astype(bool)
|
|
345
|
+
elif isinstance(condition, Scalar) and condition.value is None:
|
|
346
|
+
condition.value = False
|
|
347
|
+
|
|
348
|
+
if isinstance(result, Scalar):
|
|
349
|
+
result.value = elseOp.value
|
|
350
|
+
for i in range(len(conditions)):
|
|
351
|
+
if conditions[i].value:
|
|
352
|
+
result.value = thenOps[i].value
|
|
353
|
+
|
|
354
|
+
if isinstance(result, DataComponent):
|
|
355
|
+
full_index = conditions[0].data.index
|
|
356
|
+
result.data = pd.Series(None, index=full_index)
|
|
357
|
+
|
|
358
|
+
for i, condition in enumerate(conditions):
|
|
359
|
+
if isinstance(thenOps[i], Scalar):
|
|
360
|
+
value_series = pd.Series(thenOps[i].value, index=full_index)
|
|
361
|
+
else:
|
|
362
|
+
value_series = thenOps[i].data.reindex(full_index)
|
|
363
|
+
cond_series = condition.data.reindex(full_index)
|
|
364
|
+
cond_mask = cond_series.notna() & cond_series == True
|
|
365
|
+
result_data = result.data.copy()
|
|
366
|
+
result_data[cond_mask] = value_series[cond_mask]
|
|
367
|
+
result.data = result_data
|
|
368
|
+
|
|
369
|
+
conditions_stack = [c.data.reindex(full_index).fillna(False) for c in conditions]
|
|
370
|
+
else_cond_mask = (
|
|
371
|
+
~np.logical_or.reduce(conditions_stack)
|
|
372
|
+
if conditions_stack
|
|
373
|
+
else pd.Series(True, index=full_index)
|
|
374
|
+
)
|
|
375
|
+
if isinstance(elseOp, Scalar):
|
|
376
|
+
else_series = pd.Series(elseOp.value, index=full_index)
|
|
377
|
+
else:
|
|
378
|
+
else_series = elseOp.data.reindex(full_index)
|
|
379
|
+
result.data[else_cond_mask] = else_series[else_cond_mask]
|
|
380
|
+
|
|
381
|
+
elif isinstance(result, Dataset):
|
|
382
|
+
identifiers = result.get_identifiers_names()
|
|
383
|
+
columns = [col for col in result.get_components_names() if col not in identifiers]
|
|
384
|
+
result.data = (
|
|
385
|
+
conditions[0].data[identifiers]
|
|
386
|
+
if conditions[0].data is not None
|
|
387
|
+
else pd.DataFrame(columns=identifiers)
|
|
388
|
+
).copy()
|
|
389
|
+
|
|
390
|
+
full_index = result.data.index
|
|
391
|
+
for i in range(len(conditions)):
|
|
392
|
+
condition = conditions[i]
|
|
393
|
+
bool_col = next(x.name for x in condition.get_measures() if x.data_type == Boolean)
|
|
394
|
+
cond_mask = condition.data[bool_col].reindex(full_index).astype(bool)
|
|
395
|
+
|
|
396
|
+
if isinstance(thenOps[i], Scalar):
|
|
397
|
+
for col in columns:
|
|
398
|
+
result.data.loc[cond_mask, col] = thenOps[i].value
|
|
399
|
+
else:
|
|
400
|
+
cond_df = thenOps[i].data.reindex(full_index)
|
|
401
|
+
result.data.loc[cond_mask, columns] = cond_df.loc[cond_mask, columns]
|
|
402
|
+
|
|
403
|
+
then_cond_masks = [
|
|
404
|
+
c.data[next(x.name for x in c.get_measures() if x.data_type == Boolean)]
|
|
405
|
+
.reindex(full_index)
|
|
406
|
+
.fillna(False)
|
|
407
|
+
.astype(bool)
|
|
408
|
+
for c in conditions
|
|
409
|
+
]
|
|
410
|
+
else_cond_mask = (
|
|
411
|
+
~np.logical_or.reduce(then_cond_masks)
|
|
412
|
+
if then_cond_masks
|
|
413
|
+
else pd.Series(True, index=full_index)
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
if isinstance(elseOp, Scalar):
|
|
417
|
+
for col in columns:
|
|
418
|
+
result.data.loc[else_cond_mask, col] = elseOp.value
|
|
419
|
+
else:
|
|
420
|
+
else_df = elseOp.data.reindex(full_index)
|
|
421
|
+
result.data.loc[else_cond_mask, columns] = else_df.loc[else_cond_mask, columns]
|
|
422
|
+
|
|
423
|
+
return result
|
|
424
|
+
|
|
425
|
+
@classmethod
|
|
426
|
+
def validate(
|
|
427
|
+
cls, conditions: List[Any], thenOps: List[Any], elseOp: Any
|
|
428
|
+
) -> Union[Scalar, DataComponent, Dataset]:
|
|
429
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
430
|
+
comp_name = VirtualCounter._new_dc_name()
|
|
431
|
+
if len(set(map(type, conditions))) > 1:
|
|
432
|
+
raise SemanticError("2-1-9-1", op=cls.op)
|
|
433
|
+
|
|
434
|
+
ops = thenOps + [elseOp]
|
|
435
|
+
then_else_types = set(map(type, ops))
|
|
436
|
+
condition_type = type(conditions[0])
|
|
437
|
+
|
|
438
|
+
if condition_type is Scalar:
|
|
439
|
+
for condition in conditions:
|
|
440
|
+
if condition.data_type != Boolean:
|
|
441
|
+
raise SemanticError("2-1-9-2", op=cls.op, name=condition.name)
|
|
442
|
+
if list(then_else_types) != [Scalar]:
|
|
443
|
+
raise SemanticError("2-1-9-3", op=cls.op)
|
|
444
|
+
|
|
445
|
+
# The output data type is the data type of the last then operation that has a true
|
|
446
|
+
# condition, defaulting to the data type of the else operation if no condition is true
|
|
447
|
+
output_data_type = elseOp.data_type
|
|
448
|
+
for i in range(len(conditions)):
|
|
449
|
+
if conditions[i].value:
|
|
450
|
+
output_data_type = thenOps[i].data_type
|
|
451
|
+
|
|
452
|
+
return Scalar(
|
|
453
|
+
name="result",
|
|
454
|
+
value=None,
|
|
455
|
+
data_type=output_data_type,
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
elif condition_type is DataComponent:
|
|
459
|
+
for condition in conditions:
|
|
460
|
+
if not condition.data_type == Boolean:
|
|
461
|
+
raise SemanticError("2-1-9-4", op=cls.op, name=condition.name)
|
|
462
|
+
|
|
463
|
+
nullable = any(
|
|
464
|
+
(op.nullable if isinstance(op, DataComponent) else op.data_type == Null)
|
|
465
|
+
for op in ops
|
|
466
|
+
)
|
|
467
|
+
data_type = ops[0].data_type
|
|
468
|
+
for op in ops[1:]:
|
|
469
|
+
data_type = binary_implicit_promotion(data_type, op.data_type)
|
|
470
|
+
|
|
471
|
+
return DataComponent(
|
|
472
|
+
name=comp_name,
|
|
473
|
+
data=None,
|
|
474
|
+
data_type=data_type,
|
|
475
|
+
role=Role.MEASURE,
|
|
476
|
+
nullable=nullable,
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
# Dataset
|
|
480
|
+
for condition in conditions:
|
|
481
|
+
if len(condition.get_measures_names()) != 1:
|
|
482
|
+
raise SemanticError("1-1-1-4", op=cls.op)
|
|
483
|
+
if condition.get_measures()[0].data_type != Boolean:
|
|
484
|
+
raise SemanticError("2-1-9-5", op=cls.op, name=condition.name)
|
|
485
|
+
|
|
486
|
+
if Dataset not in then_else_types:
|
|
487
|
+
raise SemanticError("2-1-9-6", op=cls.op)
|
|
488
|
+
|
|
489
|
+
components = next(op for op in ops if isinstance(op, Dataset)).components
|
|
490
|
+
comp_names = [comp.name for comp in components.values()]
|
|
491
|
+
for op in ops:
|
|
492
|
+
if isinstance(op, Dataset) and op.get_components_names() != comp_names:
|
|
493
|
+
raise SemanticError("2-1-9-7", op=cls.op)
|
|
494
|
+
|
|
495
|
+
return Dataset(name=dataset_name, components=components, data=None)
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import Any, Dict, List, Union
|
|
3
|
+
|
|
4
|
+
import duckdb
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
from vtlengine.DataTypes import COMP_NAME_MAPPING
|
|
8
|
+
from vtlengine.Exceptions import SemanticError
|
|
9
|
+
from vtlengine.Model import Component, DataComponent, Dataset, ExternalRoutine, Role
|
|
10
|
+
from vtlengine.Operators import Binary, Unary
|
|
11
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Membership(Binary):
|
|
15
|
+
"""Membership operator class.
|
|
16
|
+
|
|
17
|
+
It inherits from Binary class and has the following class methods:
|
|
18
|
+
|
|
19
|
+
Class methods:
|
|
20
|
+
Validate: Checks if str right operand is actually within the Dataset.
|
|
21
|
+
Evaluate: Checks validate operation and return the dataset to perform it.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def validate(cls, left_operand: Any, right_operand: Any) -> Dataset:
|
|
26
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
27
|
+
if right_operand not in left_operand.components:
|
|
28
|
+
raise SemanticError(
|
|
29
|
+
"1-1-1-10",
|
|
30
|
+
op=cls.op,
|
|
31
|
+
comp_name=right_operand,
|
|
32
|
+
dataset_name=left_operand.name,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
component = left_operand.components[right_operand]
|
|
36
|
+
if component.role in (Role.IDENTIFIER, Role.ATTRIBUTE):
|
|
37
|
+
right_operand = COMP_NAME_MAPPING[component.data_type]
|
|
38
|
+
left_operand.components[right_operand] = Component(
|
|
39
|
+
name=right_operand,
|
|
40
|
+
data_type=component.data_type,
|
|
41
|
+
role=Role.MEASURE,
|
|
42
|
+
nullable=component.nullable,
|
|
43
|
+
)
|
|
44
|
+
if left_operand.data is not None:
|
|
45
|
+
left_operand.data[right_operand] = left_operand.data[component.name]
|
|
46
|
+
result_components = {
|
|
47
|
+
name: comp
|
|
48
|
+
for name, comp in left_operand.components.items()
|
|
49
|
+
if comp.role == Role.IDENTIFIER or comp.name == right_operand
|
|
50
|
+
}
|
|
51
|
+
result_dataset = Dataset(name=dataset_name, components=result_components, data=None)
|
|
52
|
+
return result_dataset
|
|
53
|
+
|
|
54
|
+
@classmethod
|
|
55
|
+
def evaluate(
|
|
56
|
+
cls,
|
|
57
|
+
left_operand: Dataset,
|
|
58
|
+
right_operand: str,
|
|
59
|
+
is_from_component_assignment: bool = False,
|
|
60
|
+
) -> Union[DataComponent, Dataset]:
|
|
61
|
+
result_dataset = cls.validate(left_operand, right_operand)
|
|
62
|
+
if left_operand.data is not None:
|
|
63
|
+
if is_from_component_assignment:
|
|
64
|
+
return DataComponent(
|
|
65
|
+
name=right_operand,
|
|
66
|
+
data_type=left_operand.components[right_operand].data_type,
|
|
67
|
+
role=Role.MEASURE,
|
|
68
|
+
nullable=left_operand.components[right_operand].nullable,
|
|
69
|
+
data=left_operand.data[right_operand],
|
|
70
|
+
)
|
|
71
|
+
result_dataset.data = left_operand.data[list(result_dataset.components.keys())]
|
|
72
|
+
return result_dataset
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class Alias(Binary):
|
|
76
|
+
"""Alias operator class
|
|
77
|
+
It inherits from Binary class, and has the following class methods:
|
|
78
|
+
|
|
79
|
+
Class methods:
|
|
80
|
+
Validate: Ensures the name given in the right operand is different from the
|
|
81
|
+
name of the Dataset. Evaluate: Checks if the data between both operators are the same.
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
@classmethod
|
|
85
|
+
def validate(cls, left_operand: Dataset, right_operand: Union[str, Dataset]) -> Dataset:
|
|
86
|
+
new_name = right_operand if isinstance(right_operand, str) else right_operand.name
|
|
87
|
+
if new_name != left_operand.name and new_name in left_operand.get_components_names():
|
|
88
|
+
raise SemanticError("1-3-1", alias=new_name)
|
|
89
|
+
return Dataset(name=new_name, components=left_operand.components, data=None)
|
|
90
|
+
|
|
91
|
+
@classmethod
|
|
92
|
+
def evaluate(cls, left_operand: Dataset, right_operand: Union[str, Dataset]) -> Dataset:
|
|
93
|
+
result = cls.validate(left_operand, right_operand)
|
|
94
|
+
result.data = left_operand.data
|
|
95
|
+
return result
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class Eval(Unary):
|
|
99
|
+
"""Eval operator class
|
|
100
|
+
It inherits from Unary class and has the following class methods
|
|
101
|
+
|
|
102
|
+
Class methods:
|
|
103
|
+
Validate: checks if the external routine name is the same as the operand name,
|
|
104
|
+
which must be a Dataset.
|
|
105
|
+
Evaluate: Checks if the operand and the output is actually a Dataset.
|
|
106
|
+
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
@staticmethod
|
|
110
|
+
def _execute_query(
|
|
111
|
+
query: str, dataset_names: List[str], data: Dict[str, pd.DataFrame]
|
|
112
|
+
) -> pd.DataFrame:
|
|
113
|
+
query = re.sub(r'"([^"]*)"', r"'\1'", query)
|
|
114
|
+
for forbidden in ["INSTALL", "LOAD"]:
|
|
115
|
+
if re.search(rf"\b{forbidden}\b", query, re.IGNORECASE):
|
|
116
|
+
raise Exception(f"Query contains forbidden command: {forbidden}")
|
|
117
|
+
if re.search(r"FROM\s+'https?://", query, re.IGNORECASE):
|
|
118
|
+
raise Exception("Query contains forbidden URL in FROM clause")
|
|
119
|
+
try:
|
|
120
|
+
conn = duckdb.connect(database=":memory:", read_only=False)
|
|
121
|
+
conn.execute("SET enable_external_access = false")
|
|
122
|
+
conn.execute("SET allow_unsigned_extensions = false")
|
|
123
|
+
conn.execute("SET allow_community_extensions = false")
|
|
124
|
+
conn.execute("SET autoinstall_known_extensions = false")
|
|
125
|
+
conn.execute("SET autoload_known_extensions = false")
|
|
126
|
+
conn.execute("SET lock_configuration = true")
|
|
127
|
+
|
|
128
|
+
try:
|
|
129
|
+
for ds_name in dataset_names:
|
|
130
|
+
df = data[ds_name]
|
|
131
|
+
conn.register(ds_name, df)
|
|
132
|
+
df_result = conn.execute(query).fetchdf()
|
|
133
|
+
conn.close()
|
|
134
|
+
except Exception as e:
|
|
135
|
+
conn.close()
|
|
136
|
+
raise Exception(f"Error executing SQL query: {e}")
|
|
137
|
+
except Exception as e:
|
|
138
|
+
raise Exception(f"Error connecting to DuckDB in memory: {e}")
|
|
139
|
+
return df_result
|
|
140
|
+
|
|
141
|
+
@classmethod
|
|
142
|
+
def validate( # type: ignore[override]
|
|
143
|
+
cls,
|
|
144
|
+
operands: Dict[str, Dataset],
|
|
145
|
+
external_routine: ExternalRoutine,
|
|
146
|
+
output: Dataset,
|
|
147
|
+
) -> Dataset:
|
|
148
|
+
empty_data_dict = {}
|
|
149
|
+
for ds_name in external_routine.dataset_names:
|
|
150
|
+
if ds_name not in operands:
|
|
151
|
+
raise ValueError(
|
|
152
|
+
f"External Routine dataset {ds_name} is not present in Eval operands"
|
|
153
|
+
)
|
|
154
|
+
empty_data = pd.DataFrame(
|
|
155
|
+
columns=[comp.name for comp in operands[ds_name].components.values()]
|
|
156
|
+
)
|
|
157
|
+
empty_data_dict[ds_name] = empty_data
|
|
158
|
+
|
|
159
|
+
df = cls._execute_query(
|
|
160
|
+
external_routine.query, external_routine.dataset_names, empty_data_dict
|
|
161
|
+
)
|
|
162
|
+
component_names = df.columns.tolist()
|
|
163
|
+
for comp_name in component_names:
|
|
164
|
+
if comp_name not in output.components:
|
|
165
|
+
raise SemanticError(
|
|
166
|
+
"1-1-1-10", op=cls.op, comp_name=comp_name, dataset_name=output.name
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
for comp_name in output.components:
|
|
170
|
+
if comp_name not in component_names:
|
|
171
|
+
raise ValueError(f"Component {comp_name} not found in External Routine result")
|
|
172
|
+
|
|
173
|
+
output.name = external_routine.name
|
|
174
|
+
|
|
175
|
+
return output
|
|
176
|
+
|
|
177
|
+
@classmethod
|
|
178
|
+
def evaluate( # type: ignore[override]
|
|
179
|
+
cls,
|
|
180
|
+
operands: Dict[str, Dataset],
|
|
181
|
+
external_routine: ExternalRoutine,
|
|
182
|
+
output: Dataset,
|
|
183
|
+
) -> Dataset:
|
|
184
|
+
result: Dataset = cls.validate(operands, external_routine, output)
|
|
185
|
+
operands_data_dict = {ds_name: operands[ds_name].data for ds_name in operands}
|
|
186
|
+
result.data = cls._execute_query(
|
|
187
|
+
external_routine.query,
|
|
188
|
+
external_routine.dataset_names,
|
|
189
|
+
operands_data_dict, # type: ignore[arg-type]
|
|
190
|
+
)
|
|
191
|
+
return result
|