vtlengine 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +153 -100
- vtlengine/API/__init__.py +109 -67
- vtlengine/AST/ASTConstructor.py +188 -98
- vtlengine/AST/ASTConstructorModules/Expr.py +306 -200
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +172 -102
- vtlengine/AST/ASTConstructorModules/Terminals.py +158 -95
- vtlengine/AST/ASTEncoders.py +1 -1
- vtlengine/AST/ASTTemplate.py +8 -9
- vtlengine/AST/ASTVisitor.py +8 -12
- vtlengine/AST/DAG/__init__.py +43 -35
- vtlengine/AST/DAG/_words.py +4 -4
- vtlengine/AST/Grammar/lexer.py +732 -142
- vtlengine/AST/Grammar/parser.py +2188 -826
- vtlengine/AST/Grammar/tokens.py +128 -128
- vtlengine/AST/VtlVisitor.py +7 -4
- vtlengine/AST/__init__.py +22 -11
- vtlengine/DataTypes/NumericTypesHandling.py +5 -4
- vtlengine/DataTypes/TimeHandling.py +194 -301
- vtlengine/DataTypes/__init__.py +304 -218
- vtlengine/Exceptions/__init__.py +52 -27
- vtlengine/Exceptions/messages.py +134 -62
- vtlengine/Interpreter/__init__.py +781 -487
- vtlengine/Model/__init__.py +165 -121
- vtlengine/Operators/Aggregation.py +156 -95
- vtlengine/Operators/Analytic.py +115 -59
- vtlengine/Operators/Assignment.py +7 -4
- vtlengine/Operators/Boolean.py +27 -32
- vtlengine/Operators/CastOperator.py +177 -131
- vtlengine/Operators/Clause.py +137 -99
- vtlengine/Operators/Comparison.py +148 -117
- vtlengine/Operators/Conditional.py +149 -98
- vtlengine/Operators/General.py +68 -47
- vtlengine/Operators/HROperators.py +91 -72
- vtlengine/Operators/Join.py +217 -118
- vtlengine/Operators/Numeric.py +89 -44
- vtlengine/Operators/RoleSetter.py +16 -15
- vtlengine/Operators/Set.py +61 -36
- vtlengine/Operators/String.py +213 -139
- vtlengine/Operators/Time.py +334 -216
- vtlengine/Operators/Validation.py +117 -76
- vtlengine/Operators/__init__.py +340 -213
- vtlengine/Utils/__init__.py +195 -40
- vtlengine/__init__.py +1 -1
- vtlengine/files/output/__init__.py +15 -6
- vtlengine/files/output/_time_period_representation.py +10 -9
- vtlengine/files/parser/__init__.py +77 -52
- vtlengine/files/parser/_rfc_dialect.py +6 -5
- vtlengine/files/parser/_time_checking.py +46 -37
- vtlengine-1.0.1.dist-info/METADATA +236 -0
- vtlengine-1.0.1.dist-info/RECORD +58 -0
- {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/WHEEL +1 -1
- vtlengine-1.0.dist-info/METADATA +0 -104
- vtlengine-1.0.dist-info/RECORD +0 -58
- {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/LICENSE.md +0 -0
|
@@ -1,31 +1,37 @@
|
|
|
1
|
-
import os
|
|
2
1
|
from copy import copy
|
|
3
|
-
from typing import Union
|
|
2
|
+
from typing import Union, Any
|
|
4
3
|
|
|
5
4
|
import numpy as np
|
|
6
5
|
|
|
7
|
-
from vtlengine.DataTypes import
|
|
8
|
-
|
|
6
|
+
from vtlengine.DataTypes import (
|
|
7
|
+
Boolean,
|
|
8
|
+
COMP_NAME_MAPPING,
|
|
9
|
+
binary_implicit_promotion,
|
|
10
|
+
SCALAR_TYPES_CLASS_REVERSE,
|
|
11
|
+
Null,
|
|
12
|
+
)
|
|
9
13
|
from vtlengine.Operators import Operator, Binary
|
|
10
14
|
|
|
11
15
|
from vtlengine.Exceptions import SemanticError
|
|
12
16
|
from vtlengine.Model import Scalar, DataComponent, Dataset, Role
|
|
13
17
|
|
|
14
|
-
if os.environ.get("SPARK", False):
|
|
15
|
-
|
|
16
|
-
else:
|
|
17
|
-
|
|
18
|
+
# if os.environ.get("SPARK", False):
|
|
19
|
+
# import pyspark.pandas as pd
|
|
20
|
+
# else:
|
|
21
|
+
# import pandas as pd
|
|
22
|
+
import pandas as pd
|
|
18
23
|
|
|
19
24
|
|
|
20
25
|
class If(Operator):
|
|
21
26
|
"""
|
|
22
27
|
If class:
|
|
23
|
-
`If-then-else <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=225&zoom=100,72,142>`_ operator
|
|
28
|
+
`If-then-else <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=225&zoom=100,72,142>`_ operator # noqa E501
|
|
24
29
|
inherits from Operator, a superclass that contains general validate and evaluate class methods.
|
|
25
30
|
It has the following class methods:
|
|
26
31
|
Class methods:
|
|
27
|
-
evaluate: Evaluates if the operation is well constructed, checking the actual condition and
|
|
28
|
-
|
|
32
|
+
evaluate: Evaluates if the operation is well constructed, checking the actual condition and
|
|
33
|
+
dropping a boolean result.
|
|
34
|
+
The result will depend on the data class, such as datacomponent and dataset.
|
|
29
35
|
|
|
30
36
|
component_level_evaluation: Returns a pandas dataframe with data to set the condition
|
|
31
37
|
|
|
@@ -37,79 +43,97 @@ class If(Operator):
|
|
|
37
43
|
"""
|
|
38
44
|
|
|
39
45
|
@classmethod
|
|
40
|
-
def evaluate(cls, condition, true_branch, false_branch):
|
|
46
|
+
def evaluate(cls, condition: Any, true_branch: Any, false_branch: Any) -> Any:
|
|
41
47
|
result = cls.validate(condition, true_branch, false_branch)
|
|
42
|
-
if isinstance(
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
48
|
+
if not isinstance(result, Scalar):
|
|
49
|
+
if isinstance(condition, DataComponent):
|
|
50
|
+
result.data = cls.component_level_evaluation(condition, true_branch, false_branch)
|
|
51
|
+
if isinstance(condition, Dataset):
|
|
52
|
+
result = cls.dataset_level_evaluation(result, condition, true_branch, false_branch)
|
|
46
53
|
return result
|
|
47
54
|
|
|
48
55
|
@classmethod
|
|
49
|
-
def component_level_evaluation(
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
56
|
+
def component_level_evaluation(
|
|
57
|
+
cls, condition: DataComponent, true_branch: Any, false_branch: Any
|
|
58
|
+
) -> Any:
|
|
59
|
+
result = None
|
|
60
|
+
if condition.data is not None:
|
|
61
|
+
if isinstance(true_branch, Scalar):
|
|
62
|
+
true_data = pd.Series(true_branch.value, index=condition.data.index)
|
|
63
|
+
else:
|
|
64
|
+
true_data = true_branch.data.reindex(condition.data.index)
|
|
65
|
+
if isinstance(false_branch, Scalar):
|
|
66
|
+
false_data = pd.Series(false_branch.value, index=condition.data.index)
|
|
67
|
+
else:
|
|
68
|
+
false_data = false_branch.data.reindex(condition.data.index)
|
|
69
|
+
result = np.where(condition.data, true_data, false_data)
|
|
62
70
|
return pd.Series(result, index=condition.data.index)
|
|
63
71
|
|
|
64
72
|
@classmethod
|
|
65
|
-
def dataset_level_evaluation(
|
|
73
|
+
def dataset_level_evaluation(
|
|
74
|
+
cls, result: Any, condition: Any, true_branch: Any, false_branch: Any
|
|
75
|
+
) -> Dataset:
|
|
66
76
|
ids = condition.get_identifiers_names()
|
|
67
77
|
condition_measure = condition.get_measures_names()[0]
|
|
68
78
|
true_data = condition.data[condition.data[condition_measure] == True]
|
|
69
79
|
false_data = condition.data[condition.data[condition_measure] != True].fillna(False)
|
|
70
80
|
|
|
71
81
|
if isinstance(true_branch, Dataset):
|
|
72
|
-
if len(true_data) > 0:
|
|
73
|
-
true_data = pd.merge(
|
|
74
|
-
|
|
82
|
+
if len(true_data) > 0 and true_branch.data is not None:
|
|
83
|
+
true_data = pd.merge(
|
|
84
|
+
true_data, true_branch.data, on=ids, how="right", suffixes=("_condition", "")
|
|
85
|
+
)
|
|
75
86
|
else:
|
|
76
87
|
true_data = pd.DataFrame(columns=true_branch.get_components_names())
|
|
77
88
|
else:
|
|
78
89
|
true_data[condition_measure] = true_data[condition_measure].apply(
|
|
79
|
-
lambda x: true_branch.value
|
|
90
|
+
lambda x: true_branch.value
|
|
91
|
+
)
|
|
80
92
|
if isinstance(false_branch, Dataset):
|
|
81
|
-
if len(false_data) > 0:
|
|
82
|
-
false_data = pd.merge(
|
|
83
|
-
|
|
93
|
+
if len(false_data) > 0 and false_branch.data is not None:
|
|
94
|
+
false_data = pd.merge(
|
|
95
|
+
false_data, false_branch.data, on=ids, how="right", suffixes=("_condition", "")
|
|
96
|
+
)
|
|
84
97
|
else:
|
|
85
98
|
false_data = pd.DataFrame(columns=false_branch.get_components_names())
|
|
86
99
|
else:
|
|
87
100
|
false_data[condition_measure] = false_data[condition_measure].apply(
|
|
88
|
-
lambda x: false_branch.value
|
|
101
|
+
lambda x: false_branch.value
|
|
102
|
+
)
|
|
89
103
|
|
|
90
|
-
result.data =
|
|
91
|
-
|
|
104
|
+
result.data = (
|
|
105
|
+
pd.concat([true_data, false_data], ignore_index=True)
|
|
106
|
+
.drop_duplicates()
|
|
107
|
+
.sort_values(by=ids)
|
|
108
|
+
)
|
|
92
109
|
if isinstance(result, Dataset):
|
|
93
|
-
drop_columns = [
|
|
94
|
-
|
|
110
|
+
drop_columns = [
|
|
111
|
+
column for column in result.data.columns if column not in result.components.keys()
|
|
112
|
+
]
|
|
95
113
|
result.data = result.data.dropna(subset=drop_columns).drop(columns=drop_columns)
|
|
96
114
|
if isinstance(true_branch, Scalar) and isinstance(false_branch, Scalar):
|
|
97
115
|
result.get_measures()[0].data_type = true_branch.data_type
|
|
98
116
|
result.get_measures()[0].name = COMP_NAME_MAPPING[true_branch.data_type]
|
|
99
|
-
result.data
|
|
100
|
-
|
|
117
|
+
if result.data is not None:
|
|
118
|
+
result.data = result.data.rename(
|
|
119
|
+
columns={condition_measure: result.get_measures()[0].name}
|
|
120
|
+
)
|
|
101
121
|
return result
|
|
102
122
|
|
|
103
123
|
@classmethod
|
|
104
|
-
def validate(
|
|
124
|
+
def validate( # noqa: C901
|
|
125
|
+
cls, condition: Any, true_branch: Any, false_branch: Any
|
|
126
|
+
) -> Union[Scalar, DataComponent, Dataset]:
|
|
105
127
|
nullable = False
|
|
106
128
|
left = true_branch
|
|
107
129
|
right = false_branch
|
|
108
130
|
if true_branch.__class__ != false_branch.__class__:
|
|
109
|
-
if (isinstance(true_branch, DataComponent) and isinstance(false_branch, Dataset)) or
|
|
110
|
-
|
|
131
|
+
if (isinstance(true_branch, DataComponent) and isinstance(false_branch, Dataset)) or (
|
|
132
|
+
isinstance(true_branch, Dataset) and isinstance(false_branch, DataComponent)
|
|
133
|
+
):
|
|
111
134
|
raise ValueError(
|
|
112
|
-
"If then and else operands cannot be dataset and component respectively"
|
|
135
|
+
"If then and else operands cannot be dataset and component respectively"
|
|
136
|
+
)
|
|
113
137
|
if isinstance(true_branch, Scalar):
|
|
114
138
|
left = false_branch
|
|
115
139
|
right = true_branch
|
|
@@ -117,42 +141,48 @@ class If(Operator):
|
|
|
117
141
|
# Datacomponent
|
|
118
142
|
if isinstance(condition, DataComponent):
|
|
119
143
|
if not condition.data_type == Boolean:
|
|
120
|
-
raise SemanticError(
|
|
121
|
-
|
|
144
|
+
raise SemanticError(
|
|
145
|
+
"1-1-9-11", op=cls.op, type=SCALAR_TYPES_CLASS_REVERSE[condition.data_type]
|
|
146
|
+
)
|
|
122
147
|
if not isinstance(left, Scalar) or not isinstance(right, Scalar):
|
|
123
148
|
nullable = condition.nullable
|
|
124
149
|
else:
|
|
125
|
-
if
|
|
126
|
-
nullable = True
|
|
127
|
-
if isinstance(right, Scalar) and right.data_type == Null:
|
|
150
|
+
if left.data_type == Null or right.data_type == Null:
|
|
128
151
|
nullable = True
|
|
129
152
|
if isinstance(left, DataComponent):
|
|
130
153
|
nullable |= left.nullable
|
|
131
154
|
if isinstance(right, DataComponent):
|
|
132
155
|
nullable |= right.nullable
|
|
133
|
-
return DataComponent(
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
156
|
+
return DataComponent(
|
|
157
|
+
name="result",
|
|
158
|
+
data=None,
|
|
159
|
+
data_type=binary_implicit_promotion(left.data_type, right.data_type),
|
|
160
|
+
role=Role.MEASURE,
|
|
161
|
+
nullable=nullable,
|
|
162
|
+
)
|
|
137
163
|
|
|
138
164
|
# Dataset
|
|
139
165
|
if isinstance(left, Scalar) and isinstance(right, Scalar):
|
|
140
|
-
raise SemanticError(
|
|
141
|
-
|
|
166
|
+
raise SemanticError(
|
|
167
|
+
"1-1-9-12", op=cls.op, then_symbol=left.name, else_symbol=right.name
|
|
168
|
+
)
|
|
142
169
|
if isinstance(left, DataComponent):
|
|
143
|
-
raise SemanticError(
|
|
144
|
-
|
|
170
|
+
raise SemanticError(
|
|
171
|
+
"1-1-9-12", op=cls.op, then_symbol=left.name, else_symbol=right.name
|
|
172
|
+
)
|
|
145
173
|
if isinstance(left, Scalar):
|
|
146
|
-
left.data_type = right.data_type = binary_implicit_promotion(
|
|
147
|
-
|
|
148
|
-
|
|
174
|
+
left.data_type = right.data_type = binary_implicit_promotion(
|
|
175
|
+
left.data_type, right.data_type
|
|
176
|
+
)
|
|
177
|
+
return Dataset(name="result", components=copy(condition.components), data=None)
|
|
149
178
|
if left.get_identifiers() != condition.get_identifiers():
|
|
150
179
|
raise SemanticError("1-1-9-10", op=cls.op, clause=left.name)
|
|
151
180
|
if isinstance(right, Scalar):
|
|
152
181
|
for component in left.get_measures():
|
|
153
182
|
if component.data_type != right.data_type:
|
|
154
|
-
component.data_type = binary_implicit_promotion(
|
|
155
|
-
|
|
183
|
+
component.data_type = binary_implicit_promotion(
|
|
184
|
+
component.data_type, right.data_type
|
|
185
|
+
)
|
|
156
186
|
if isinstance(right, Dataset):
|
|
157
187
|
if left.get_identifiers() != condition.get_identifiers():
|
|
158
188
|
raise SemanticError("1-1-9-10", op=cls.op, clause=right.name)
|
|
@@ -160,80 +190,101 @@ class If(Operator):
|
|
|
160
190
|
raise SemanticError("1-1-9-13", op=cls.op, then=left.name, else_clause=right.name)
|
|
161
191
|
for component in left.get_measures():
|
|
162
192
|
if component.data_type != right.components[component.name].data_type:
|
|
163
|
-
component.data_type = right.components[component.name].data_type =
|
|
164
|
-
binary_implicit_promotion(
|
|
165
|
-
|
|
193
|
+
component.data_type = right.components[component.name].data_type = (
|
|
194
|
+
binary_implicit_promotion(
|
|
195
|
+
component.data_type, right.components[component.name].data_type
|
|
196
|
+
)
|
|
197
|
+
)
|
|
166
198
|
if isinstance(condition, Dataset):
|
|
167
199
|
if len(condition.get_measures()) != 1:
|
|
168
200
|
raise SemanticError("1-1-9-4", op=cls.op, name=condition.name)
|
|
169
201
|
if condition.get_measures()[0].data_type != Boolean:
|
|
170
|
-
raise SemanticError(
|
|
171
|
-
|
|
202
|
+
raise SemanticError(
|
|
203
|
+
"1-1-9-5",
|
|
204
|
+
op=cls.op,
|
|
205
|
+
type=SCALAR_TYPES_CLASS_REVERSE[condition.get_measures()[0].data_type],
|
|
206
|
+
)
|
|
172
207
|
if left.get_identifiers() != condition.get_identifiers():
|
|
173
208
|
raise SemanticError("1-1-9-6", op=cls.op)
|
|
174
209
|
result_components = {comp_name: copy(comp) for comp_name, comp in left.components.items()}
|
|
175
|
-
return Dataset(name=
|
|
210
|
+
return Dataset(name="result", components=result_components, data=None)
|
|
176
211
|
|
|
177
212
|
|
|
178
213
|
class Nvl(Binary):
|
|
179
214
|
"""
|
|
180
215
|
Null class:
|
|
181
|
-
`Nvl <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=229&zoom=100,72,370>`_operator class.
|
|
216
|
+
`Nvl <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=229&zoom=100,72,370>`_operator class. # noqa E501
|
|
182
217
|
It has the following class methods:
|
|
183
218
|
|
|
184
219
|
Class methods:
|
|
185
|
-
Validate: Class method that validates if the operation at scalar,
|
|
220
|
+
Validate: Class method that validates if the operation at scalar,
|
|
221
|
+
datacomponent or dataset level can be performed.
|
|
186
222
|
Evaluate: Evaluates the actual operation, returning the result.
|
|
187
223
|
"""
|
|
188
224
|
|
|
189
225
|
@classmethod
|
|
190
|
-
def evaluate(cls, left, right):
|
|
226
|
+
def evaluate(cls, left: Any, right: Any) -> Union[Scalar, DataComponent, Dataset]:
|
|
191
227
|
result = cls.validate(left, right)
|
|
192
228
|
|
|
193
|
-
if isinstance(left, Scalar):
|
|
194
|
-
if pd.isnull(left):
|
|
229
|
+
if isinstance(left, Scalar) and isinstance(result, Scalar):
|
|
230
|
+
if pd.isnull(left): # type: ignore[call-overload]
|
|
195
231
|
result.value = right.value
|
|
196
232
|
else:
|
|
197
233
|
result.value = left.value
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
234
|
+
else:
|
|
235
|
+
if not isinstance(result, Scalar):
|
|
236
|
+
if isinstance(right, Scalar):
|
|
237
|
+
result.data = left.data.fillna(right.value)
|
|
238
|
+
else:
|
|
239
|
+
result.data = left.data.fillna(right.data)
|
|
240
|
+
if isinstance(result, Dataset):
|
|
241
|
+
result.data = result.data[result.get_components_names()]
|
|
205
242
|
return result
|
|
206
243
|
|
|
207
244
|
@classmethod
|
|
208
|
-
def validate(cls, left, right) -> Scalar
|
|
245
|
+
def validate(cls, left: Any, right: Any) -> Union[Scalar, DataComponent, Dataset]:
|
|
246
|
+
result_components = {}
|
|
209
247
|
if isinstance(left, Scalar):
|
|
210
248
|
if not isinstance(right, Scalar):
|
|
211
249
|
raise ValueError(
|
|
212
|
-
"Nvl operation at scalar level must have scalar
|
|
250
|
+
"Nvl operation at scalar level must have scalar "
|
|
251
|
+
"types on right (applicable) side"
|
|
252
|
+
)
|
|
213
253
|
cls.type_validation(left.data_type, right.data_type)
|
|
214
|
-
return Scalar(name=
|
|
254
|
+
return Scalar(name="result", value=None, data_type=left.data_type)
|
|
215
255
|
if isinstance(left, DataComponent):
|
|
216
256
|
if isinstance(right, Dataset):
|
|
217
257
|
raise ValueError(
|
|
218
|
-
"Nvl operation at component level cannot have
|
|
258
|
+
"Nvl operation at component level cannot have "
|
|
259
|
+
"dataset type on right (applicable) side"
|
|
260
|
+
)
|
|
219
261
|
cls.type_validation(left.data_type, right.data_type)
|
|
220
|
-
return DataComponent(
|
|
221
|
-
|
|
222
|
-
|
|
262
|
+
return DataComponent(
|
|
263
|
+
name="result",
|
|
264
|
+
data=pd.Series(dtype=object),
|
|
265
|
+
data_type=left.data_type,
|
|
266
|
+
role=Role.MEASURE,
|
|
267
|
+
nullable=False,
|
|
268
|
+
)
|
|
223
269
|
if isinstance(left, Dataset):
|
|
224
270
|
if isinstance(right, DataComponent):
|
|
225
271
|
raise ValueError(
|
|
226
|
-
"Nvl operation at dataset level cannot have component
|
|
272
|
+
"Nvl operation at dataset level cannot have component "
|
|
273
|
+
"type on right (applicable) side"
|
|
274
|
+
)
|
|
227
275
|
if isinstance(right, Scalar):
|
|
228
276
|
for component in left.get_measures():
|
|
229
277
|
cls.type_validation(component.data_type, right.data_type)
|
|
230
278
|
if isinstance(right, Dataset):
|
|
231
279
|
for component in left.get_measures():
|
|
232
|
-
cls.type_validation(
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
280
|
+
cls.type_validation(
|
|
281
|
+
component.data_type, right.components[component.name].data_type
|
|
282
|
+
)
|
|
283
|
+
result_components = {
|
|
284
|
+
comp_name: copy(comp)
|
|
285
|
+
for comp_name, comp in left.components.items()
|
|
286
|
+
if comp.role != Role.ATTRIBUTE
|
|
287
|
+
}
|
|
237
288
|
for comp in result_components.values():
|
|
238
289
|
comp.nullable = False
|
|
239
|
-
|
|
290
|
+
return Dataset(name="result", components=result_components, data=None)
|
vtlengine/Operators/General.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Dict, List
|
|
1
|
+
from typing import Dict, List, Any, Union
|
|
2
2
|
|
|
3
3
|
import pandas as pd
|
|
4
4
|
import sqlite3
|
|
@@ -20,38 +20,47 @@ class Membership(Binary):
|
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
22
|
@classmethod
|
|
23
|
-
def validate(cls, left_operand:
|
|
23
|
+
def validate(cls, left_operand: Any, right_operand: Any) -> Dataset:
|
|
24
24
|
if right_operand not in left_operand.components:
|
|
25
|
-
raise SemanticError(
|
|
26
|
-
|
|
25
|
+
raise SemanticError(
|
|
26
|
+
"1-1-1-10", op=cls.op, comp_name=right_operand, dataset_name=left_operand.name
|
|
27
|
+
)
|
|
27
28
|
|
|
28
29
|
component = left_operand.components[right_operand]
|
|
29
30
|
if component.role in (Role.IDENTIFIER, Role.ATTRIBUTE):
|
|
30
31
|
right_operand = COMP_NAME_MAPPING[component.data_type]
|
|
31
|
-
left_operand.components[right_operand] = Component(
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
32
|
+
left_operand.components[right_operand] = Component(
|
|
33
|
+
name=right_operand,
|
|
34
|
+
data_type=component.data_type,
|
|
35
|
+
role=Role.MEASURE,
|
|
36
|
+
nullable=component.nullable,
|
|
37
|
+
)
|
|
35
38
|
if left_operand.data is not None:
|
|
36
39
|
left_operand.data[right_operand] = left_operand.data[component.name]
|
|
37
40
|
left_operand.data[right_operand] = left_operand.data[component.name]
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
+
result_components = {
|
|
42
|
+
name: comp
|
|
43
|
+
for name, comp in left_operand.components.items()
|
|
44
|
+
if comp.role == Role.IDENTIFIER or comp.name == right_operand
|
|
45
|
+
}
|
|
41
46
|
result_dataset = Dataset(name="result", components=result_components, data=None)
|
|
42
47
|
return result_dataset
|
|
43
48
|
|
|
44
49
|
@classmethod
|
|
45
|
-
def evaluate(
|
|
46
|
-
|
|
50
|
+
def evaluate(
|
|
51
|
+
cls, left_operand: Dataset, right_operand: str, is_from_component_assignment: bool = False
|
|
52
|
+
) -> Union[DataComponent, Dataset]:
|
|
47
53
|
result_dataset = cls.validate(left_operand, right_operand)
|
|
48
|
-
if
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
54
|
+
if left_operand.data is not None:
|
|
55
|
+
if is_from_component_assignment:
|
|
56
|
+
return DataComponent(
|
|
57
|
+
name=right_operand,
|
|
58
|
+
data_type=left_operand.components[right_operand].data_type,
|
|
59
|
+
role=Role.MEASURE,
|
|
60
|
+
nullable=left_operand.components[right_operand].nullable,
|
|
61
|
+
data=left_operand.data[right_operand],
|
|
62
|
+
)
|
|
63
|
+
result_dataset.data = left_operand.data[list(result_dataset.components.keys())]
|
|
55
64
|
return result_dataset
|
|
56
65
|
|
|
57
66
|
|
|
@@ -60,19 +69,19 @@ class Alias(Binary):
|
|
|
60
69
|
It inherits from Binary class, and has the following class methods:
|
|
61
70
|
|
|
62
71
|
Class methods:
|
|
63
|
-
Validate: Ensures the name given in the right operand is different from the
|
|
64
|
-
Evaluate: Checks if the data between both operators are the same.
|
|
72
|
+
Validate: Ensures the name given in the right operand is different from the
|
|
73
|
+
name of the Dataset. Evaluate: Checks if the data between both operators are the same.
|
|
65
74
|
"""
|
|
66
75
|
|
|
67
76
|
@classmethod
|
|
68
|
-
def validate(cls, left_operand: Dataset, right_operand: str):
|
|
77
|
+
def validate(cls, left_operand: Dataset, right_operand: Union[str, Dataset]) -> Dataset:
|
|
69
78
|
new_name = right_operand if isinstance(right_operand, str) else right_operand.name
|
|
70
79
|
if new_name != left_operand.name and new_name in left_operand.get_components_names():
|
|
71
80
|
raise SemanticError("1-3-1", alias=new_name)
|
|
72
81
|
return Dataset(name=new_name, components=left_operand.components, data=None)
|
|
73
82
|
|
|
74
83
|
@classmethod
|
|
75
|
-
def evaluate(cls, left_operand: Dataset, right_operand: str) -> Dataset:
|
|
84
|
+
def evaluate(cls, left_operand: Dataset, right_operand: Union[str, Dataset]) -> Dataset:
|
|
76
85
|
result = cls.validate(left_operand, right_operand)
|
|
77
86
|
result.data = left_operand.data
|
|
78
87
|
return result
|
|
@@ -83,14 +92,16 @@ class Eval(Unary):
|
|
|
83
92
|
It inherits from Unary class and has the following class methods
|
|
84
93
|
|
|
85
94
|
Class methods:
|
|
86
|
-
Validate: checks if the external routine name is the same as the operand name,
|
|
95
|
+
Validate: checks if the external routine name is the same as the operand name,
|
|
96
|
+
which must be a Dataset.
|
|
87
97
|
Evaluate: Checks if the operand and the output is actually a Dataset.
|
|
88
98
|
|
|
89
99
|
"""
|
|
90
100
|
|
|
91
101
|
@staticmethod
|
|
92
|
-
def _execute_query(
|
|
93
|
-
|
|
102
|
+
def _execute_query(
|
|
103
|
+
query: str, dataset_names: List[str], data: Dict[str, pd.DataFrame]
|
|
104
|
+
) -> pd.DataFrame:
|
|
94
105
|
try:
|
|
95
106
|
conn = sqlite3.connect(":memory:")
|
|
96
107
|
try:
|
|
@@ -111,25 +122,33 @@ class Eval(Unary):
|
|
|
111
122
|
return df_result
|
|
112
123
|
|
|
113
124
|
@classmethod
|
|
114
|
-
def validate(
|
|
115
|
-
|
|
125
|
+
def validate( # type: ignore[override]
|
|
126
|
+
cls,
|
|
127
|
+
operands: Dict[str, Dataset],
|
|
128
|
+
external_routine: ExternalRoutine,
|
|
129
|
+
output: Dataset,
|
|
130
|
+
) -> Dataset:
|
|
116
131
|
|
|
117
132
|
empty_data_dict = {}
|
|
118
133
|
for ds_name in external_routine.dataset_names:
|
|
119
134
|
if ds_name not in operands:
|
|
120
|
-
raise ValueError(
|
|
121
|
-
|
|
135
|
+
raise ValueError(
|
|
136
|
+
f"External Routine dataset {ds_name} " f"is not present in Eval operands"
|
|
137
|
+
)
|
|
122
138
|
empty_data = pd.DataFrame(
|
|
123
|
-
columns=[comp.name for comp in operands[ds_name].components.values()]
|
|
139
|
+
columns=[comp.name for comp in operands[ds_name].components.values()]
|
|
140
|
+
)
|
|
124
141
|
empty_data_dict[ds_name] = empty_data
|
|
125
142
|
|
|
126
|
-
df = cls._execute_query(
|
|
127
|
-
|
|
143
|
+
df = cls._execute_query(
|
|
144
|
+
external_routine.query, external_routine.dataset_names, empty_data_dict
|
|
145
|
+
)
|
|
128
146
|
component_names = [name for name in df.columns]
|
|
129
147
|
for comp_name in component_names:
|
|
130
148
|
if comp_name not in output.components:
|
|
131
|
-
raise SemanticError(
|
|
132
|
-
|
|
149
|
+
raise SemanticError(
|
|
150
|
+
"1-1-1-10", op=cls.op, comp_name=comp_name, dataset_name=df.name
|
|
151
|
+
)
|
|
133
152
|
|
|
134
153
|
for comp_name in output.components:
|
|
135
154
|
if comp_name not in component_names:
|
|
@@ -140,15 +159,17 @@ class Eval(Unary):
|
|
|
140
159
|
return output
|
|
141
160
|
|
|
142
161
|
@classmethod
|
|
143
|
-
def evaluate(
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
162
|
+
def evaluate( # type: ignore[override]
|
|
163
|
+
cls,
|
|
164
|
+
operands: Dict[str, Dataset],
|
|
165
|
+
external_routine: ExternalRoutine,
|
|
166
|
+
output: Dataset,
|
|
167
|
+
) -> Dataset:
|
|
168
|
+
result: Dataset = cls.validate(operands, external_routine, output)
|
|
169
|
+
operands_data_dict = {ds_name: operands[ds_name].data for ds_name in operands}
|
|
170
|
+
result.data = cls._execute_query(
|
|
171
|
+
external_routine.query,
|
|
172
|
+
external_routine.dataset_names,
|
|
173
|
+
operands_data_dict, # type: ignore[arg-type]
|
|
174
|
+
)
|
|
154
175
|
return result
|