vtlengine 1.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +153 -100
- vtlengine/API/__init__.py +109 -67
- vtlengine/AST/ASTConstructor.py +188 -98
- vtlengine/AST/ASTConstructorModules/Expr.py +306 -200
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +172 -102
- vtlengine/AST/ASTConstructorModules/Terminals.py +158 -95
- vtlengine/AST/ASTEncoders.py +1 -1
- vtlengine/AST/ASTTemplate.py +8 -9
- vtlengine/AST/ASTVisitor.py +8 -12
- vtlengine/AST/DAG/__init__.py +43 -35
- vtlengine/AST/DAG/_words.py +4 -4
- vtlengine/AST/Grammar/lexer.py +732 -142
- vtlengine/AST/Grammar/parser.py +2188 -826
- vtlengine/AST/Grammar/tokens.py +128 -128
- vtlengine/AST/VtlVisitor.py +7 -4
- vtlengine/AST/__init__.py +22 -11
- vtlengine/DataTypes/NumericTypesHandling.py +5 -4
- vtlengine/DataTypes/TimeHandling.py +194 -301
- vtlengine/DataTypes/__init__.py +304 -218
- vtlengine/Exceptions/__init__.py +52 -27
- vtlengine/Exceptions/messages.py +134 -62
- vtlengine/Interpreter/__init__.py +781 -487
- vtlengine/Model/__init__.py +165 -121
- vtlengine/Operators/Aggregation.py +156 -95
- vtlengine/Operators/Analytic.py +115 -59
- vtlengine/Operators/Assignment.py +7 -4
- vtlengine/Operators/Boolean.py +27 -32
- vtlengine/Operators/CastOperator.py +177 -131
- vtlengine/Operators/Clause.py +137 -99
- vtlengine/Operators/Comparison.py +148 -117
- vtlengine/Operators/Conditional.py +149 -98
- vtlengine/Operators/General.py +68 -47
- vtlengine/Operators/HROperators.py +91 -72
- vtlengine/Operators/Join.py +217 -118
- vtlengine/Operators/Numeric.py +89 -44
- vtlengine/Operators/RoleSetter.py +16 -15
- vtlengine/Operators/Set.py +61 -36
- vtlengine/Operators/String.py +213 -139
- vtlengine/Operators/Time.py +334 -216
- vtlengine/Operators/Validation.py +117 -76
- vtlengine/Operators/__init__.py +340 -213
- vtlengine/Utils/__init__.py +195 -40
- vtlengine/__init__.py +1 -1
- vtlengine/files/output/__init__.py +15 -6
- vtlengine/files/output/_time_period_representation.py +10 -9
- vtlengine/files/parser/__init__.py +77 -52
- vtlengine/files/parser/_rfc_dialect.py +6 -5
- vtlengine/files/parser/_time_checking.py +46 -37
- vtlengine-1.0.1.dist-info/METADATA +236 -0
- vtlengine-1.0.1.dist-info/RECORD +58 -0
- {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/WHEEL +1 -1
- vtlengine-1.0.dist-info/METADATA +0 -104
- vtlengine-1.0.dist-info/RECORD +0 -58
- {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/LICENSE.md +0 -0
vtlengine/Operators/Join.py
CHANGED
|
@@ -1,36 +1,40 @@
|
|
|
1
|
-
import os
|
|
2
1
|
from copy import copy
|
|
3
2
|
from functools import reduce
|
|
4
|
-
from typing import List, Dict
|
|
3
|
+
from typing import List, Dict, Any, Optional
|
|
5
4
|
|
|
6
5
|
from vtlengine.DataTypes import binary_implicit_promotion
|
|
7
6
|
|
|
8
7
|
from vtlengine.AST import BinOp
|
|
9
8
|
from vtlengine.Exceptions import SemanticError
|
|
10
9
|
|
|
11
|
-
if os.environ.get("SPARK"):
|
|
12
|
-
|
|
13
|
-
else:
|
|
14
|
-
|
|
10
|
+
# if os.environ.get("SPARK"):
|
|
11
|
+
# import pyspark.pandas as pd
|
|
12
|
+
# else:
|
|
13
|
+
# import pandas as pd
|
|
14
|
+
import pandas as pd
|
|
15
15
|
|
|
16
16
|
from vtlengine.Model import Dataset, Component, Role
|
|
17
17
|
from vtlengine.Operators import Operator, _id_type_promotion_join_keys
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
class Join(Operator):
|
|
21
|
-
how
|
|
22
|
-
reference_dataset
|
|
21
|
+
how: str
|
|
22
|
+
reference_dataset: Dataset
|
|
23
23
|
|
|
24
24
|
@classmethod
|
|
25
25
|
def get_components_union(cls, datasets: List[Dataset]) -> List[Component]:
|
|
26
|
-
common = []
|
|
27
|
-
common.extend(
|
|
28
|
-
|
|
26
|
+
common: List[Any] = []
|
|
27
|
+
common.extend(
|
|
28
|
+
copy(comp)
|
|
29
|
+
for dataset in datasets
|
|
30
|
+
for comp in dataset.components.values()
|
|
31
|
+
if comp not in common
|
|
32
|
+
)
|
|
29
33
|
return common
|
|
30
34
|
|
|
31
35
|
@classmethod
|
|
32
|
-
def get_components_intersection(cls,
|
|
33
|
-
element_count = {}
|
|
36
|
+
def get_components_intersection(cls, operands: List[Any]) -> Any:
|
|
37
|
+
element_count: Dict[str, Any] = {}
|
|
34
38
|
for operand in operands:
|
|
35
39
|
operand_set = set(operand)
|
|
36
40
|
for element in operand_set:
|
|
@@ -42,29 +46,44 @@ class Join(Operator):
|
|
|
42
46
|
return result
|
|
43
47
|
|
|
44
48
|
@classmethod
|
|
45
|
-
def merge_components(
|
|
49
|
+
def merge_components(
|
|
50
|
+
cls, operands: Any, using: Optional[List[str]] = None
|
|
51
|
+
) -> Dict[str, Component]:
|
|
46
52
|
nullability = {}
|
|
47
53
|
merged_components = {}
|
|
48
54
|
using = using or []
|
|
49
|
-
common = cls.get_components_intersection(
|
|
50
|
-
totally_common = list(
|
|
51
|
-
|
|
55
|
+
common = cls.get_components_intersection([op.get_components_names() for op in operands])
|
|
56
|
+
totally_common = list(
|
|
57
|
+
reduce(
|
|
58
|
+
lambda x, y: x & set(y.get_components_names()), # type: ignore[operator]
|
|
59
|
+
operands[1:],
|
|
60
|
+
set(operands[0].get_components_names()),
|
|
61
|
+
)
|
|
62
|
+
)
|
|
52
63
|
|
|
53
64
|
for op in operands:
|
|
54
65
|
for comp in op.components.values():
|
|
55
66
|
if comp.name in using:
|
|
56
|
-
is_identifier = all(
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
67
|
+
is_identifier = all(
|
|
68
|
+
operand.components[comp.name].role == Role.IDENTIFIER
|
|
69
|
+
for operand in operands
|
|
70
|
+
if comp.name in operand.get_components_names()
|
|
71
|
+
)
|
|
72
|
+
comp.role = (
|
|
73
|
+
Role.IDENTIFIER
|
|
74
|
+
if is_identifier
|
|
75
|
+
else Role.MEASURE if comp.role == Role.IDENTIFIER else comp.role
|
|
76
|
+
)
|
|
60
77
|
if comp.name not in nullability:
|
|
61
78
|
nullability[comp.name] = copy(comp.nullable)
|
|
62
79
|
if comp.role == Role.IDENTIFIER:
|
|
63
80
|
nullability[comp.name] = False
|
|
64
81
|
elif comp.name in totally_common:
|
|
65
82
|
nullability[comp.name] |= copy(comp.nullable)
|
|
66
|
-
elif cls.how ==
|
|
67
|
-
|
|
83
|
+
elif cls.how == "outer" or (
|
|
84
|
+
cls.how == "left"
|
|
85
|
+
and comp.name not in cls.reference_dataset.get_components_names()
|
|
86
|
+
):
|
|
68
87
|
nullability[comp.name] = True
|
|
69
88
|
else:
|
|
70
89
|
nullability[comp.name] = copy(comp.nullable)
|
|
@@ -77,12 +96,12 @@ class Join(Operator):
|
|
|
77
96
|
component.nullable = nullability[component_name]
|
|
78
97
|
|
|
79
98
|
if component_name in common and component_name not in using:
|
|
80
|
-
if component.role != Role.IDENTIFIER or cls.how ==
|
|
81
|
-
new_name = f
|
|
99
|
+
if component.role != Role.IDENTIFIER or cls.how == "cross":
|
|
100
|
+
new_name = f"{operand_name}#{component_name}"
|
|
82
101
|
if new_name in merged_components:
|
|
83
102
|
raise SemanticError("1-1-13-9", comp_name=new_name)
|
|
84
103
|
while new_name in common:
|
|
85
|
-
new_name +=
|
|
104
|
+
new_name += "_dup"
|
|
86
105
|
merged_components[new_name] = component
|
|
87
106
|
merged_components[new_name].name = new_name
|
|
88
107
|
else:
|
|
@@ -90,18 +109,21 @@ class Join(Operator):
|
|
|
90
109
|
else:
|
|
91
110
|
if component_name in using and component_name in merged_components:
|
|
92
111
|
data_type = binary_implicit_promotion(
|
|
93
|
-
merged_components[component_name].data_type, component.data_type
|
|
112
|
+
merged_components[component_name].data_type, component.data_type
|
|
113
|
+
)
|
|
94
114
|
component.data_type = data_type
|
|
95
115
|
merged_components[component_name] = component
|
|
96
116
|
|
|
97
117
|
return merged_components
|
|
98
118
|
|
|
99
119
|
@classmethod
|
|
100
|
-
def generate_result_components(
|
|
101
|
-
|
|
120
|
+
def generate_result_components(
|
|
121
|
+
cls, operands: List[Dataset], using: Optional[List[str]] = None
|
|
122
|
+
) -> Dict[str, Component]:
|
|
102
123
|
components = {}
|
|
103
124
|
inter_identifiers = cls.get_components_intersection(
|
|
104
|
-
|
|
125
|
+
[op.get_identifiers_names() for op in operands]
|
|
126
|
+
)
|
|
105
127
|
|
|
106
128
|
for op in operands:
|
|
107
129
|
ids = op.get_identifiers_names()
|
|
@@ -112,7 +134,9 @@ class Join(Operator):
|
|
|
112
134
|
@classmethod
|
|
113
135
|
def evaluate(cls, operands: List[Dataset], using: List[str]) -> Dataset:
|
|
114
136
|
result = cls.execute([copy(operand) for operand in operands], using)
|
|
115
|
-
if sorted(result.get_components_names()) != sorted(
|
|
137
|
+
if result.data is not None and sorted(result.get_components_names()) != sorted(
|
|
138
|
+
result.data.columns.tolist()
|
|
139
|
+
):
|
|
116
140
|
missing = list(set(result.get_components_names()) - set(result.data.columns.tolist()))
|
|
117
141
|
if len(missing) == 0:
|
|
118
142
|
missing.append("None")
|
|
@@ -128,31 +152,49 @@ class Join(Operator):
|
|
|
128
152
|
return result
|
|
129
153
|
|
|
130
154
|
common_measures = cls.get_components_intersection(
|
|
131
|
-
|
|
155
|
+
[op.get_measures_names() + op.get_attributes_names() for op in operands]
|
|
156
|
+
)
|
|
132
157
|
for op in operands:
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
158
|
+
if op.data is not None:
|
|
159
|
+
for column in op.data.columns.tolist():
|
|
160
|
+
if column in common_measures and column not in using:
|
|
161
|
+
op.data = op.data.rename(columns={column: op.name + "#" + column})
|
|
136
162
|
result.data = copy(cls.reference_dataset.data)
|
|
137
163
|
|
|
138
164
|
join_keys = using if using else result.get_identifiers_names()
|
|
139
165
|
|
|
140
166
|
for op in operands:
|
|
141
167
|
if op is not cls.reference_dataset:
|
|
142
|
-
merge_join_keys =
|
|
168
|
+
merge_join_keys = (
|
|
169
|
+
[key for key in join_keys if key in op.data.columns.tolist()]
|
|
170
|
+
if (op.data is not None)
|
|
171
|
+
else []
|
|
172
|
+
)
|
|
143
173
|
if len(merge_join_keys) == 0:
|
|
144
174
|
raise SemanticError("1-1-13-14", name=op.name)
|
|
145
175
|
for join_key in merge_join_keys:
|
|
146
|
-
_id_type_promotion_join_keys(
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
176
|
+
_id_type_promotion_join_keys(
|
|
177
|
+
result.get_component(join_key),
|
|
178
|
+
op.get_component(join_key),
|
|
179
|
+
join_key,
|
|
180
|
+
result.data,
|
|
181
|
+
op.data,
|
|
182
|
+
)
|
|
183
|
+
if op.data is not None and result.data is not None:
|
|
184
|
+
result.data = pd.merge(
|
|
185
|
+
result.data,
|
|
186
|
+
op.data,
|
|
187
|
+
how=cls.how, # type: ignore[arg-type]
|
|
188
|
+
on=merge_join_keys,
|
|
189
|
+
)
|
|
190
|
+
else:
|
|
191
|
+
result.data = pd.DataFrame()
|
|
192
|
+
if result.data is not None:
|
|
193
|
+
result.data.reset_index(drop=True, inplace=True)
|
|
152
194
|
return result
|
|
153
195
|
|
|
154
196
|
@classmethod
|
|
155
|
-
def validate(cls, operands: List[Dataset], using: List[str]) -> Dataset:
|
|
197
|
+
def validate(cls, operands: List[Dataset], using: Optional[List[str]]) -> Dataset:
|
|
156
198
|
if len(operands) < 1 or sum([isinstance(op, Dataset) for op in operands]) < 1:
|
|
157
199
|
raise Exception("Join operator requires at least 1 dataset")
|
|
158
200
|
if not all([isinstance(op, Dataset) for op in operands]):
|
|
@@ -162,8 +204,11 @@ class Join(Operator):
|
|
|
162
204
|
for op in operands:
|
|
163
205
|
if len(op.get_identifiers()) == 0:
|
|
164
206
|
raise SemanticError("1-3-27", op=cls.op)
|
|
165
|
-
cls.reference_dataset =
|
|
166
|
-
x.get_identifiers_names()))
|
|
207
|
+
cls.reference_dataset = (
|
|
208
|
+
max(operands, key=lambda x: len(x.get_identifiers_names()))
|
|
209
|
+
if cls.how not in ["cross", "left"]
|
|
210
|
+
else operands[0]
|
|
211
|
+
)
|
|
167
212
|
cls.identifiers_validation(operands, using)
|
|
168
213
|
components = cls.merge_components(operands, using)
|
|
169
214
|
if len(set(components.keys())) != len(components):
|
|
@@ -172,7 +217,7 @@ class Join(Operator):
|
|
|
172
217
|
return Dataset(name="result", components=components, data=None)
|
|
173
218
|
|
|
174
219
|
@classmethod
|
|
175
|
-
def identifiers_validation(cls, operands: List[Dataset], using: List[str]) -> None:
|
|
220
|
+
def identifiers_validation(cls, operands: List[Dataset], using: Optional[List[str]]) -> None:
|
|
176
221
|
|
|
177
222
|
# (Case A)
|
|
178
223
|
info = {op.name: op.get_identifiers_names() for op in operands}
|
|
@@ -182,45 +227,60 @@ class Join(Operator):
|
|
|
182
227
|
|
|
183
228
|
for op_name, identifiers in info.items():
|
|
184
229
|
if op_name != cls.reference_dataset.name and not set(identifiers).issubset(
|
|
185
|
-
|
|
230
|
+
set(info[cls.reference_dataset.name])
|
|
231
|
+
):
|
|
186
232
|
if using is None:
|
|
187
233
|
missing_components = list(
|
|
188
|
-
set(identifiers) - set(info[cls.reference_dataset.name])
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
234
|
+
set(identifiers) - set(info[cls.reference_dataset.name])
|
|
235
|
+
)
|
|
236
|
+
raise SemanticError(
|
|
237
|
+
"1-1-13-11",
|
|
238
|
+
op=cls.op,
|
|
239
|
+
dataset_reference=cls.reference_dataset.name,
|
|
240
|
+
component=missing_components[0],
|
|
241
|
+
)
|
|
192
242
|
if using is None:
|
|
193
243
|
return
|
|
194
244
|
|
|
195
245
|
# (Case B1)
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
246
|
+
if cls.reference_dataset is not None:
|
|
247
|
+
for op_name, identifiers in info.items():
|
|
248
|
+
if op_name != cls.reference_dataset.name and not set(identifiers).issubset(using):
|
|
249
|
+
raise SemanticError("1-1-13-4", op=cls.op, using_names=using, dataset=op_name)
|
|
250
|
+
reference_components = cls.reference_dataset.get_components_names()
|
|
251
|
+
if not set(using).issubset(reference_components):
|
|
252
|
+
raise SemanticError(
|
|
253
|
+
"1-1-13-6",
|
|
254
|
+
op=cls.op,
|
|
255
|
+
using_components=using,
|
|
256
|
+
reference=cls.reference_dataset.name,
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
for op_name, identifiers in info.items():
|
|
260
|
+
if not set(using).issubset(identifiers):
|
|
261
|
+
# (Case B2)
|
|
262
|
+
if not set(using).issubset(reference_components):
|
|
263
|
+
raise SemanticError("1-1-13-5", op=cls.op, using_names=using)
|
|
264
|
+
else:
|
|
265
|
+
for op in operands:
|
|
266
|
+
if op is not cls.reference_dataset:
|
|
267
|
+
for component in using:
|
|
268
|
+
if component not in op.get_components_names():
|
|
269
|
+
raise SemanticError(
|
|
270
|
+
"1-1-1-10",
|
|
271
|
+
op=cls.op,
|
|
272
|
+
comp_name=component,
|
|
273
|
+
dataset_name=op.name,
|
|
274
|
+
)
|
|
216
275
|
|
|
217
276
|
|
|
218
277
|
class InnerJoin(Join):
|
|
219
|
-
how =
|
|
278
|
+
how = "inner"
|
|
220
279
|
|
|
221
280
|
@classmethod
|
|
222
|
-
def generate_result_components(
|
|
223
|
-
|
|
281
|
+
def generate_result_components(
|
|
282
|
+
cls, operands: List[Dataset], using: Optional[List[str]] = None
|
|
283
|
+
) -> Dict[str, Component]:
|
|
224
284
|
|
|
225
285
|
if using is None:
|
|
226
286
|
return super().generate_result_components(operands, using)
|
|
@@ -228,57 +288,74 @@ class InnerJoin(Join):
|
|
|
228
288
|
components = {}
|
|
229
289
|
for op in operands:
|
|
230
290
|
components.update(
|
|
231
|
-
{id: op.components[id] for id in using if id in op.get_measures_names()}
|
|
291
|
+
{id: op.components[id] for id in using if id in op.get_measures_names()}
|
|
292
|
+
)
|
|
232
293
|
for op in operands:
|
|
233
294
|
components.update({id: op.components[id] for id in op.get_identifiers_names()})
|
|
234
295
|
return components
|
|
235
296
|
|
|
236
297
|
|
|
237
298
|
class LeftJoin(Join):
|
|
238
|
-
how =
|
|
299
|
+
how = "left"
|
|
239
300
|
|
|
240
301
|
|
|
241
302
|
class FullJoin(Join):
|
|
242
|
-
how =
|
|
303
|
+
how = "outer"
|
|
243
304
|
|
|
244
305
|
@classmethod
|
|
245
|
-
def identifiers_validation(
|
|
306
|
+
def identifiers_validation(
|
|
307
|
+
cls, operands: List[Dataset], using: Optional[List[str]] = None
|
|
308
|
+
) -> None:
|
|
246
309
|
if using is not None:
|
|
247
310
|
raise SemanticError("1-1-13-8", op=cls.op)
|
|
248
311
|
for op in operands:
|
|
249
312
|
if op is cls.reference_dataset:
|
|
250
313
|
continue
|
|
251
314
|
if len(op.get_identifiers_names()) != len(
|
|
252
|
-
|
|
315
|
+
cls.reference_dataset.get_identifiers_names()
|
|
316
|
+
):
|
|
253
317
|
raise SemanticError("1-1-13-13", op=cls.op)
|
|
254
318
|
if op.get_identifiers_names() != cls.reference_dataset.get_identifiers_names():
|
|
255
319
|
raise SemanticError("1-1-13-12", op=cls.op)
|
|
256
320
|
|
|
257
321
|
|
|
258
322
|
class CrossJoin(Join):
|
|
259
|
-
how =
|
|
323
|
+
how = "cross"
|
|
260
324
|
|
|
261
325
|
@classmethod
|
|
262
|
-
def execute(cls, operands: List[Dataset], using=None) -> Dataset:
|
|
326
|
+
def execute(cls, operands: List[Dataset], using: Optional[List[str]] = None) -> Dataset:
|
|
263
327
|
result = cls.validate(operands, using)
|
|
264
328
|
if len(operands) == 1:
|
|
265
329
|
result.data = operands[0].data
|
|
266
330
|
return result
|
|
267
|
-
common = cls.get_components_intersection(
|
|
331
|
+
common = cls.get_components_intersection([op.get_components_names() for op in operands])
|
|
268
332
|
|
|
269
333
|
for op in operands:
|
|
334
|
+
if op.data is None:
|
|
335
|
+
op.data = pd.DataFrame(columns=op.get_components_names())
|
|
270
336
|
if op is operands[0]:
|
|
271
337
|
result.data = op.data
|
|
272
338
|
else:
|
|
273
|
-
result.data
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
339
|
+
if result.data is not None:
|
|
340
|
+
result.data = pd.merge(
|
|
341
|
+
result.data, op.data, how=cls.how # type: ignore[arg-type]
|
|
342
|
+
)
|
|
343
|
+
if result.data is not None:
|
|
344
|
+
result.data = result.data.rename(
|
|
345
|
+
columns={
|
|
346
|
+
column: op.name + "#" + column
|
|
347
|
+
for column in result.data.columns.tolist()
|
|
348
|
+
if column in common
|
|
349
|
+
}
|
|
350
|
+
)
|
|
351
|
+
if result.data is not None:
|
|
352
|
+
result.data.reset_index(drop=True, inplace=True)
|
|
278
353
|
return result
|
|
279
354
|
|
|
280
355
|
@classmethod
|
|
281
|
-
def identifiers_validation(
|
|
356
|
+
def identifiers_validation(
|
|
357
|
+
cls, operands: List[Dataset], using: Optional[List[str]] = None
|
|
358
|
+
) -> None:
|
|
282
359
|
if using is not None:
|
|
283
360
|
raise SemanticError("1-1-13-8", op=cls.op)
|
|
284
361
|
|
|
@@ -286,59 +363,81 @@ class CrossJoin(Join):
|
|
|
286
363
|
class Apply(Operator):
|
|
287
364
|
|
|
288
365
|
@classmethod
|
|
289
|
-
def evaluate(cls, dataset: Dataset, expression, op_map:
|
|
366
|
+
def evaluate(cls, dataset: Dataset, expression: Any, op_map: Dict[str, Any]) -> Dataset:
|
|
290
367
|
for child in expression:
|
|
291
368
|
dataset = cls.execute(dataset, op_map[child.op], child.left.value, child.right.value)
|
|
292
369
|
return dataset
|
|
293
370
|
|
|
294
371
|
@classmethod
|
|
295
|
-
def execute(cls, dataset: Dataset, op, left: str, right: str) -> Dataset:
|
|
372
|
+
def execute(cls, dataset: Dataset, op: Any, left: str, right: str) -> Dataset:
|
|
296
373
|
left_dataset = cls.create_dataset("left", left, dataset)
|
|
297
374
|
right_dataset = cls.create_dataset("right", right, dataset)
|
|
298
375
|
left_dataset, right_dataset = cls.get_common_components(left_dataset, right_dataset)
|
|
299
376
|
return op.evaluate(left_dataset, right_dataset)
|
|
300
377
|
|
|
301
378
|
@classmethod
|
|
302
|
-
def validate(cls, dataset: Dataset, child, op_map:
|
|
379
|
+
def validate(cls, dataset: Dataset, child: Any, op_map: Dict[str, Any]) -> None:
|
|
303
380
|
if not isinstance(child, BinOp):
|
|
304
381
|
raise Exception(
|
|
305
|
-
f"Invalid expression {child} on apply operator. Only BinOp are accepted"
|
|
382
|
+
f"Invalid expression {child} on apply operator. Only BinOp are accepted"
|
|
383
|
+
)
|
|
306
384
|
if child.op not in op_map:
|
|
307
385
|
raise Exception(f"Operator {child.op} not implemented")
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
386
|
+
if hasattr(child.left, "value") and hasattr(child.right, "value"):
|
|
387
|
+
left_components = [
|
|
388
|
+
comp.name[len(child.left.value) + 1]
|
|
389
|
+
for comp in dataset.components.values()
|
|
390
|
+
if comp.name.startswith(child.left.value)
|
|
391
|
+
]
|
|
392
|
+
right_components = [
|
|
393
|
+
comp.name[len(child.right.value) + 1]
|
|
394
|
+
for comp in dataset.components.values()
|
|
395
|
+
if comp.name.startswith(child.right.value)
|
|
396
|
+
]
|
|
397
|
+
if len(set(left_components) & set(right_components)) == 0:
|
|
398
|
+
raise Exception(
|
|
399
|
+
f"{child.left.value} and {child.right.value} "
|
|
400
|
+
f"has not any match on dataset components"
|
|
401
|
+
)
|
|
317
402
|
|
|
318
403
|
@classmethod
|
|
319
404
|
def create_dataset(cls, name: str, prefix: str, dataset: Dataset) -> Dataset:
|
|
320
|
-
prefix +=
|
|
321
|
-
components = {
|
|
322
|
-
|
|
323
|
-
|
|
405
|
+
prefix += "#"
|
|
406
|
+
components = {
|
|
407
|
+
component.name: component
|
|
408
|
+
for component in dataset.components.values()
|
|
409
|
+
if component.name.startswith(prefix) or component.role is Role.IDENTIFIER
|
|
410
|
+
}
|
|
411
|
+
data = dataset.data[list(components.keys())] if dataset.data is not None else pd.DataFrame()
|
|
324
412
|
|
|
325
413
|
for component in components.values():
|
|
326
|
-
component.name =
|
|
327
|
-
|
|
328
|
-
|
|
414
|
+
component.name = (
|
|
415
|
+
component.name[len(prefix) :]
|
|
416
|
+
if (component.name.startswith(prefix) and component.role is not Role.IDENTIFIER)
|
|
417
|
+
else component.name
|
|
418
|
+
)
|
|
329
419
|
components = {component.name: component for component in components.values()}
|
|
330
|
-
data.rename(
|
|
331
|
-
|
|
332
|
-
|
|
420
|
+
data.rename(
|
|
421
|
+
columns={
|
|
422
|
+
column: column[len(prefix) :]
|
|
423
|
+
for column in data.columns
|
|
424
|
+
if column.startswith(prefix)
|
|
425
|
+
},
|
|
426
|
+
inplace=True,
|
|
427
|
+
)
|
|
333
428
|
return Dataset(name=name, components=components, data=data)
|
|
334
429
|
|
|
335
430
|
@classmethod
|
|
336
|
-
def get_common_components(
|
|
431
|
+
def get_common_components(
|
|
432
|
+
cls, left: Dataset, right: Dataset
|
|
433
|
+
) -> (Dataset, Dataset): # type: ignore[syntax]
|
|
337
434
|
common = set(left.get_components_names()) & set(right.get_components_names())
|
|
338
|
-
left.components = {
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
435
|
+
left.components = {
|
|
436
|
+
comp.name: comp for comp in left.components.values() if comp.name in common
|
|
437
|
+
}
|
|
438
|
+
right.components = {
|
|
439
|
+
comp.name: comp for comp in right.components.values() if comp.name in common
|
|
440
|
+
}
|
|
441
|
+
left.data = left.data[list(common)] if left.data is not None else pd.DataFrame()
|
|
442
|
+
right.data = right.data[list(common)] if right.data is not None else pd.DataFrame()
|
|
344
443
|
return left, right
|