vtlengine 1.0__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +159 -102
- vtlengine/API/__init__.py +110 -68
- vtlengine/AST/ASTConstructor.py +188 -98
- vtlengine/AST/ASTConstructorModules/Expr.py +402 -205
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +248 -104
- vtlengine/AST/ASTConstructorModules/Terminals.py +158 -95
- vtlengine/AST/ASTEncoders.py +1 -1
- vtlengine/AST/ASTTemplate.py +24 -9
- vtlengine/AST/ASTVisitor.py +8 -12
- vtlengine/AST/DAG/__init__.py +43 -35
- vtlengine/AST/DAG/_words.py +4 -4
- vtlengine/AST/Grammar/Vtl.g4 +49 -20
- vtlengine/AST/Grammar/VtlTokens.g4 +13 -1
- vtlengine/AST/Grammar/lexer.py +2012 -1312
- vtlengine/AST/Grammar/parser.py +7524 -4343
- vtlengine/AST/Grammar/tokens.py +140 -128
- vtlengine/AST/VtlVisitor.py +16 -5
- vtlengine/AST/__init__.py +41 -11
- vtlengine/DataTypes/NumericTypesHandling.py +5 -4
- vtlengine/DataTypes/TimeHandling.py +196 -301
- vtlengine/DataTypes/__init__.py +304 -218
- vtlengine/Exceptions/__init__.py +96 -27
- vtlengine/Exceptions/messages.py +149 -69
- vtlengine/Interpreter/__init__.py +817 -497
- vtlengine/Model/__init__.py +172 -121
- vtlengine/Operators/Aggregation.py +156 -95
- vtlengine/Operators/Analytic.py +167 -79
- vtlengine/Operators/Assignment.py +7 -4
- vtlengine/Operators/Boolean.py +27 -32
- vtlengine/Operators/CastOperator.py +177 -131
- vtlengine/Operators/Clause.py +137 -99
- vtlengine/Operators/Comparison.py +148 -117
- vtlengine/Operators/Conditional.py +290 -98
- vtlengine/Operators/General.py +68 -47
- vtlengine/Operators/HROperators.py +91 -72
- vtlengine/Operators/Join.py +217 -118
- vtlengine/Operators/Numeric.py +129 -46
- vtlengine/Operators/RoleSetter.py +16 -15
- vtlengine/Operators/Set.py +61 -36
- vtlengine/Operators/String.py +213 -139
- vtlengine/Operators/Time.py +467 -215
- vtlengine/Operators/Validation.py +117 -76
- vtlengine/Operators/__init__.py +340 -213
- vtlengine/Utils/__init__.py +232 -41
- vtlengine/__init__.py +1 -1
- vtlengine/files/output/__init__.py +15 -6
- vtlengine/files/output/_time_period_representation.py +10 -9
- vtlengine/files/parser/__init__.py +79 -52
- vtlengine/files/parser/_rfc_dialect.py +6 -5
- vtlengine/files/parser/_time_checking.py +48 -37
- vtlengine-1.0.2.dist-info/METADATA +245 -0
- vtlengine-1.0.2.dist-info/RECORD +58 -0
- {vtlengine-1.0.dist-info → vtlengine-1.0.2.dist-info}/WHEEL +1 -1
- vtlengine-1.0.dist-info/METADATA +0 -104
- vtlengine-1.0.dist-info/RECORD +0 -58
- {vtlengine-1.0.dist-info → vtlengine-1.0.2.dist-info}/LICENSE.md +0 -0
vtlengine/Operators/Analytic.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import os
|
|
2
1
|
from copy import copy
|
|
3
2
|
from typing import List, Optional
|
|
4
3
|
|
|
@@ -6,21 +5,33 @@ import duckdb
|
|
|
6
5
|
|
|
7
6
|
from vtlengine.Exceptions import SemanticError
|
|
8
7
|
|
|
9
|
-
if os.environ.get("SPARK"):
|
|
10
|
-
|
|
11
|
-
else:
|
|
12
|
-
|
|
8
|
+
# if os.environ.get("SPARK"):
|
|
9
|
+
# import pyspark.pandas as pd
|
|
10
|
+
# else:
|
|
11
|
+
# import pandas as pd
|
|
12
|
+
import pandas as pd
|
|
13
13
|
|
|
14
14
|
import vtlengine.Operators as Operator
|
|
15
15
|
from vtlengine.AST import OrderBy, Windowing
|
|
16
|
-
from vtlengine.AST.Grammar.tokens import
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
16
|
+
from vtlengine.AST.Grammar.tokens import (
|
|
17
|
+
AVG,
|
|
18
|
+
COUNT,
|
|
19
|
+
FIRST_VALUE,
|
|
20
|
+
LAG,
|
|
21
|
+
LAST_VALUE,
|
|
22
|
+
LEAD,
|
|
23
|
+
MAX,
|
|
24
|
+
MEDIAN,
|
|
25
|
+
MIN,
|
|
26
|
+
RANK,
|
|
27
|
+
RATIO_TO_REPORT,
|
|
28
|
+
STDDEV_POP,
|
|
29
|
+
STDDEV_SAMP,
|
|
30
|
+
SUM,
|
|
31
|
+
VAR_POP,
|
|
32
|
+
VAR_SAMP,
|
|
33
|
+
)
|
|
34
|
+
from vtlengine.DataTypes import COMP_NAME_MAPPING, Integer, Number, unary_implicit_promotion
|
|
24
35
|
from vtlengine.Model import Component, Dataset, Role
|
|
25
36
|
|
|
26
37
|
|
|
@@ -36,14 +47,19 @@ class Analytic(Operator.Unary):
|
|
|
36
47
|
analyticfunc: Specify class method that returns a dataframe using the duckdb library.
|
|
37
48
|
Evaluate: Ensures the type of data is the correct one to perform the Analytic operators.
|
|
38
49
|
"""
|
|
39
|
-
|
|
50
|
+
|
|
51
|
+
sql_op: Optional[str] = None
|
|
40
52
|
|
|
41
53
|
@classmethod
|
|
42
|
-
def validate(
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
54
|
+
def validate( # type: ignore[override]
|
|
55
|
+
cls,
|
|
56
|
+
operand: Dataset,
|
|
57
|
+
partitioning: List[str],
|
|
58
|
+
ordering: Optional[List[OrderBy]],
|
|
59
|
+
window: Optional[Windowing],
|
|
60
|
+
params: Optional[List[int]],
|
|
61
|
+
component_name: Optional[str] = None,
|
|
62
|
+
) -> Dataset:
|
|
47
63
|
if ordering is None:
|
|
48
64
|
order_components = []
|
|
49
65
|
else:
|
|
@@ -53,50 +69,84 @@ class Analytic(Operator.Unary):
|
|
|
53
69
|
|
|
54
70
|
for comp_name in partitioning:
|
|
55
71
|
if comp_name not in operand.components:
|
|
56
|
-
raise SemanticError(
|
|
57
|
-
|
|
72
|
+
raise SemanticError(
|
|
73
|
+
"1-1-1-10", op=cls.op, comp_name=comp_name, dataset_name=operand.name
|
|
74
|
+
)
|
|
58
75
|
if comp_name not in identifier_names:
|
|
59
|
-
raise SemanticError(
|
|
60
|
-
|
|
76
|
+
raise SemanticError(
|
|
77
|
+
"1-1-3-2",
|
|
78
|
+
op=cls.op,
|
|
79
|
+
id_name=comp_name,
|
|
80
|
+
id_type=operand.components[comp_name].role,
|
|
81
|
+
)
|
|
61
82
|
for comp_name in order_components:
|
|
62
83
|
if comp_name not in operand.components:
|
|
63
|
-
raise SemanticError(
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
if
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
84
|
+
raise SemanticError(
|
|
85
|
+
"1-1-1-10", op=cls.op, comp_name=comp_name, dataset_name=operand.name
|
|
86
|
+
)
|
|
87
|
+
if component_name is not None:
|
|
88
|
+
if cls.type_to_check is not None:
|
|
89
|
+
unary_implicit_promotion(
|
|
90
|
+
operand.components[component_name].data_type, cls.type_to_check
|
|
91
|
+
)
|
|
92
|
+
if cls.return_type is not None:
|
|
93
|
+
result_components[component_name] = Component(
|
|
94
|
+
name=component_name,
|
|
95
|
+
data_type=cls.return_type,
|
|
96
|
+
role=operand.components[component_name].role,
|
|
97
|
+
nullable=operand.components[component_name].nullable,
|
|
98
|
+
)
|
|
99
|
+
if cls.op == COUNT:
|
|
100
|
+
measure_name = COMP_NAME_MAPPING[cls.return_type]
|
|
101
|
+
result_components[measure_name] = Component(
|
|
102
|
+
name=measure_name,
|
|
103
|
+
data_type=cls.return_type,
|
|
104
|
+
role=Role.MEASURE,
|
|
105
|
+
nullable=operand.components[component_name].nullable,
|
|
106
|
+
)
|
|
107
|
+
if component_name in result_components:
|
|
108
|
+
del result_components[component_name]
|
|
109
|
+
else:
|
|
110
|
+
measures = operand.get_measures()
|
|
111
|
+
if len(measures) == 0:
|
|
112
|
+
raise SemanticError("1-1-1-8", op=cls.op, name=operand.name)
|
|
113
|
+
if cls.type_to_check is not None:
|
|
114
|
+
for measure in measures:
|
|
115
|
+
unary_implicit_promotion(measure.data_type, cls.type_to_check)
|
|
116
|
+
if cls.return_type is not None:
|
|
117
|
+
for measure in measures:
|
|
118
|
+
new_measure = copy(measure)
|
|
119
|
+
new_measure.data_type = cls.return_type
|
|
120
|
+
result_components[measure.name] = new_measure
|
|
121
|
+
if cls.op == COUNT and len(measures) <= 1:
|
|
122
|
+
measure_name = COMP_NAME_MAPPING[cls.return_type]
|
|
123
|
+
nullable = False if len(measures) == 0 else measures[0].nullable
|
|
124
|
+
if len(measures) == 1:
|
|
125
|
+
del result_components[measures[0].name]
|
|
126
|
+
result_components[measure_name] = Component(
|
|
127
|
+
name=measure_name,
|
|
128
|
+
data_type=cls.return_type,
|
|
129
|
+
role=Role.MEASURE,
|
|
130
|
+
nullable=nullable,
|
|
131
|
+
)
|
|
87
132
|
|
|
88
133
|
return Dataset(name="result", components=result_components, data=None)
|
|
89
134
|
|
|
90
135
|
@classmethod
|
|
91
|
-
def analyticfunc(
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
136
|
+
def analyticfunc(
|
|
137
|
+
cls,
|
|
138
|
+
df: pd.DataFrame,
|
|
139
|
+
partitioning: List[str],
|
|
140
|
+
identifier_names: List[str],
|
|
141
|
+
measure_names: List[str],
|
|
142
|
+
ordering: List[OrderBy],
|
|
143
|
+
window: Optional[Windowing],
|
|
144
|
+
params: Optional[List[int]] = None,
|
|
145
|
+
) -> pd.DataFrame:
|
|
97
146
|
"""Annotation class
|
|
98
147
|
|
|
99
|
-
It is used to analyze the attributes specified bellow
|
|
148
|
+
It is used to analyze the attributes specified bellow
|
|
149
|
+
ensuring that the type of data is the correct one to perform
|
|
100
150
|
the operation.
|
|
101
151
|
|
|
102
152
|
Attributes:
|
|
@@ -110,18 +160,26 @@ class Analytic(Operator.Unary):
|
|
|
110
160
|
window_str = ""
|
|
111
161
|
if window is not None:
|
|
112
162
|
mode = "ROWS" if window.type_ == "data" else "RANGE"
|
|
113
|
-
start_mode =
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
163
|
+
start_mode = (
|
|
164
|
+
window.start_mode
|
|
165
|
+
if window.start_mode != "current" and window.start != "CURRENT ROW"
|
|
166
|
+
else ""
|
|
167
|
+
)
|
|
168
|
+
stop_mode = (
|
|
169
|
+
window.stop_mode
|
|
170
|
+
if window.stop_mode != "current" and window.stop != "CURRENT ROW"
|
|
171
|
+
else ""
|
|
172
|
+
)
|
|
173
|
+
if isinstance(window.start, int) and window.start == -1:
|
|
174
|
+
window.start = "UNBOUNDED"
|
|
117
175
|
|
|
118
|
-
if stop_mode ==
|
|
119
|
-
window.stop =
|
|
176
|
+
if stop_mode == "" and window.stop == 0:
|
|
177
|
+
window.stop = "CURRENT ROW"
|
|
120
178
|
window_str = f"{mode} BETWEEN {window.start} {start_mode} AND {window.stop} {stop_mode}"
|
|
121
179
|
|
|
122
180
|
# Partitioning
|
|
123
181
|
if len(partitioning) > 0:
|
|
124
|
-
partition = "PARTITION BY " +
|
|
182
|
+
partition = "PARTITION BY " + ", ".join(partitioning)
|
|
125
183
|
else:
|
|
126
184
|
partition = ""
|
|
127
185
|
|
|
@@ -143,7 +201,7 @@ class Analytic(Operator.Unary):
|
|
|
143
201
|
elif cls.op == RATIO_TO_REPORT:
|
|
144
202
|
measure_query = f"CAST({measure} AS REAL) / SUM(CAST({measure} AS REAL))"
|
|
145
203
|
elif cls.op in [LAG, LEAD]:
|
|
146
|
-
measure_query = f"{cls.sql_op}({measure}, {','.join(map(str, params))})"
|
|
204
|
+
measure_query = f"{cls.sql_op}({measure}, {','.join(map(str, params or []))})"
|
|
147
205
|
else:
|
|
148
206
|
measure_query = f"{cls.sql_op}({measure})"
|
|
149
207
|
if cls.op == COUNT and len(measure_names) == 1:
|
|
@@ -153,33 +211,47 @@ class Analytic(Operator.Unary):
|
|
|
153
211
|
measure_queries.append(measure_query)
|
|
154
212
|
if cls.op == COUNT and len(measure_names) == 0:
|
|
155
213
|
measure_queries.append(
|
|
156
|
-
f"COUNT(*) {analytic_str} as {COMP_NAME_MAPPING[cls.return_type]}"
|
|
214
|
+
f"COUNT(*) {analytic_str} as {COMP_NAME_MAPPING[cls.return_type]}"
|
|
215
|
+
)
|
|
157
216
|
|
|
158
|
-
measures_sql =
|
|
159
|
-
identifiers_sql =
|
|
217
|
+
measures_sql = ", ".join(measure_queries)
|
|
218
|
+
identifiers_sql = ", ".join(identifier_names)
|
|
160
219
|
query = f"SELECT {identifiers_sql} , {measures_sql} FROM df"
|
|
161
220
|
|
|
162
221
|
if cls.op == COUNT:
|
|
163
222
|
df[measure_names] = df[measure_names].fillna(-1)
|
|
164
|
-
if os.getenv("SPARK", False):
|
|
165
|
-
|
|
223
|
+
# if os.getenv("SPARK", False):
|
|
224
|
+
# df = df.to_pandas()
|
|
166
225
|
return duckdb.query(query).to_df()
|
|
167
226
|
|
|
168
227
|
@classmethod
|
|
169
|
-
def evaluate(
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
228
|
+
def evaluate( # type: ignore[override]
|
|
229
|
+
cls,
|
|
230
|
+
operand: Dataset,
|
|
231
|
+
partitioning: List[str],
|
|
232
|
+
ordering: Optional[List[OrderBy]],
|
|
233
|
+
window: Optional[Windowing],
|
|
234
|
+
params: Optional[List[int]],
|
|
235
|
+
component_name: Optional[str] = None,
|
|
236
|
+
) -> Dataset:
|
|
237
|
+
result = cls.validate(operand, partitioning, ordering, window, params, component_name)
|
|
238
|
+
df = operand.data.copy() if operand.data is not None else pd.DataFrame()
|
|
177
239
|
identifier_names = operand.get_identifiers_names()
|
|
178
240
|
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
241
|
+
if component_name is not None:
|
|
242
|
+
measure_names = [component_name]
|
|
243
|
+
else:
|
|
244
|
+
measure_names = operand.get_measures_names()
|
|
245
|
+
|
|
246
|
+
result.data = cls.analyticfunc(
|
|
247
|
+
df=df,
|
|
248
|
+
partitioning=partitioning,
|
|
249
|
+
identifier_names=identifier_names,
|
|
250
|
+
measure_names=measure_names,
|
|
251
|
+
ordering=ordering or [],
|
|
252
|
+
window=window,
|
|
253
|
+
params=params,
|
|
254
|
+
)
|
|
183
255
|
return result
|
|
184
256
|
|
|
185
257
|
|
|
@@ -187,6 +259,7 @@ class Max(Analytic):
|
|
|
187
259
|
"""
|
|
188
260
|
Max operator
|
|
189
261
|
"""
|
|
262
|
+
|
|
190
263
|
op = MAX
|
|
191
264
|
sql_op = "MAX"
|
|
192
265
|
|
|
@@ -195,6 +268,7 @@ class Min(Analytic):
|
|
|
195
268
|
"""
|
|
196
269
|
Min operator
|
|
197
270
|
"""
|
|
271
|
+
|
|
198
272
|
op = MIN
|
|
199
273
|
sql_op = "MIN"
|
|
200
274
|
|
|
@@ -203,6 +277,7 @@ class Sum(Analytic):
|
|
|
203
277
|
"""
|
|
204
278
|
Sum operator
|
|
205
279
|
"""
|
|
280
|
+
|
|
206
281
|
op = SUM
|
|
207
282
|
type_to_check = Number
|
|
208
283
|
return_type = Number
|
|
@@ -213,6 +288,7 @@ class Count(Analytic):
|
|
|
213
288
|
"""
|
|
214
289
|
Count operator
|
|
215
290
|
"""
|
|
291
|
+
|
|
216
292
|
op = COUNT
|
|
217
293
|
type_to_check = None
|
|
218
294
|
return_type = Integer
|
|
@@ -223,6 +299,7 @@ class Avg(Analytic):
|
|
|
223
299
|
"""
|
|
224
300
|
Average operator
|
|
225
301
|
"""
|
|
302
|
+
|
|
226
303
|
op = AVG
|
|
227
304
|
type_to_check = Number
|
|
228
305
|
return_type = Number
|
|
@@ -233,6 +310,7 @@ class Median(Analytic):
|
|
|
233
310
|
"""
|
|
234
311
|
Median operator
|
|
235
312
|
"""
|
|
313
|
+
|
|
236
314
|
op = MEDIAN
|
|
237
315
|
type_to_check = Number
|
|
238
316
|
return_type = Number
|
|
@@ -243,6 +321,7 @@ class PopulationStandardDeviation(Analytic):
|
|
|
243
321
|
"""
|
|
244
322
|
Population deviation operator
|
|
245
323
|
"""
|
|
324
|
+
|
|
246
325
|
op = STDDEV_POP
|
|
247
326
|
type_to_check = Number
|
|
248
327
|
return_type = Number
|
|
@@ -253,6 +332,7 @@ class SampleStandardDeviation(Analytic):
|
|
|
253
332
|
"""
|
|
254
333
|
Sample standard deviation operator.
|
|
255
334
|
"""
|
|
335
|
+
|
|
256
336
|
op = STDDEV_SAMP
|
|
257
337
|
type_to_check = Number
|
|
258
338
|
return_type = Number
|
|
@@ -263,6 +343,7 @@ class PopulationVariance(Analytic):
|
|
|
263
343
|
"""
|
|
264
344
|
Variance operator
|
|
265
345
|
"""
|
|
346
|
+
|
|
266
347
|
op = VAR_POP
|
|
267
348
|
type_to_check = Number
|
|
268
349
|
return_type = Number
|
|
@@ -273,6 +354,7 @@ class SampleVariance(Analytic):
|
|
|
273
354
|
"""
|
|
274
355
|
Sample variance operator
|
|
275
356
|
"""
|
|
357
|
+
|
|
276
358
|
op = VAR_SAMP
|
|
277
359
|
type_to_check = Number
|
|
278
360
|
return_type = Number
|
|
@@ -283,6 +365,7 @@ class FirstValue(Analytic):
|
|
|
283
365
|
"""
|
|
284
366
|
First value operator
|
|
285
367
|
"""
|
|
368
|
+
|
|
286
369
|
op = FIRST_VALUE
|
|
287
370
|
sql_op = "FIRST"
|
|
288
371
|
|
|
@@ -291,6 +374,7 @@ class LastValue(Analytic):
|
|
|
291
374
|
"""
|
|
292
375
|
Last value operator
|
|
293
376
|
"""
|
|
377
|
+
|
|
294
378
|
op = LAST_VALUE
|
|
295
379
|
sql_op = "LAST"
|
|
296
380
|
|
|
@@ -299,6 +383,7 @@ class Lag(Analytic):
|
|
|
299
383
|
"""
|
|
300
384
|
Lag operator
|
|
301
385
|
"""
|
|
386
|
+
|
|
302
387
|
op = LAG
|
|
303
388
|
sql_op = "LAG"
|
|
304
389
|
|
|
@@ -307,6 +392,7 @@ class Lead(Analytic):
|
|
|
307
392
|
"""
|
|
308
393
|
Lead operator
|
|
309
394
|
"""
|
|
395
|
+
|
|
310
396
|
op = LEAD
|
|
311
397
|
sql_op = "LEAD"
|
|
312
398
|
|
|
@@ -315,6 +401,7 @@ class Rank(Analytic):
|
|
|
315
401
|
"""
|
|
316
402
|
Rank operator
|
|
317
403
|
"""
|
|
404
|
+
|
|
318
405
|
op = RANK
|
|
319
406
|
sql_op = "RANK"
|
|
320
407
|
return_type = Integer
|
|
@@ -324,6 +411,7 @@ class RatioToReport(Analytic):
|
|
|
324
411
|
"""
|
|
325
412
|
Ratio operator
|
|
326
413
|
"""
|
|
414
|
+
|
|
327
415
|
op = RATIO_TO_REPORT
|
|
328
416
|
type_to_check = Number
|
|
329
417
|
return_type = Number
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Union
|
|
1
|
+
from typing import Union, Any
|
|
2
2
|
|
|
3
3
|
from vtlengine.Operators import Binary
|
|
4
4
|
|
|
@@ -11,12 +11,15 @@ ALL_MODEL_TYPES = Union[DataComponent, Dataset]
|
|
|
11
11
|
class Assignment(Binary):
|
|
12
12
|
|
|
13
13
|
@classmethod
|
|
14
|
-
def validate(cls, left_operand:
|
|
15
|
-
if
|
|
14
|
+
def validate(cls, left_operand: Any, right_operand: Any) -> ALL_MODEL_TYPES:
|
|
15
|
+
if (
|
|
16
|
+
isinstance(right_operand, DataComponent)
|
|
17
|
+
and right_operand.role.__str__() == "IDENTIFIER"
|
|
18
|
+
):
|
|
16
19
|
raise SemanticError("1-1-6-13", op=cls.op, comp_name=right_operand.name)
|
|
17
20
|
right_operand.name = left_operand
|
|
18
21
|
return right_operand
|
|
19
22
|
|
|
20
23
|
@classmethod
|
|
21
|
-
def evaluate(cls, left_operand:
|
|
24
|
+
def evaluate(cls, left_operand: Any, right_operand: Any) -> ALL_MODEL_TYPES:
|
|
22
25
|
return cls.validate(left_operand, right_operand)
|
vtlengine/Operators/Boolean.py
CHANGED
|
@@ -1,11 +1,8 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
import pyspark.pandas as pd
|
|
7
|
-
else:
|
|
8
|
-
import pandas as pd
|
|
1
|
+
# if os.environ.get("SPARK", False):
|
|
2
|
+
# import pyspark.pandas as pd
|
|
3
|
+
# else:
|
|
4
|
+
# import pandas as pd
|
|
5
|
+
import pandas as pd
|
|
9
6
|
|
|
10
7
|
from typing import Optional, Any
|
|
11
8
|
|
|
@@ -22,22 +19,20 @@ class Unary(Operator.Unary):
|
|
|
22
19
|
class Binary(Operator.Binary):
|
|
23
20
|
type_to_check = Boolean
|
|
24
21
|
return_type = Boolean
|
|
25
|
-
comp_op = None
|
|
22
|
+
comp_op: Any = None
|
|
26
23
|
|
|
27
24
|
@classmethod
|
|
28
|
-
def apply_operation_series_scalar(cls, series:
|
|
29
|
-
series_left: bool) -> Any:
|
|
25
|
+
def apply_operation_series_scalar(cls, series: Any, scalar: Any, series_left: bool) -> Any:
|
|
30
26
|
if series_left:
|
|
31
27
|
return series.map(lambda x: cls.py_op(x, scalar))
|
|
32
28
|
else:
|
|
33
29
|
return series.map(lambda x: cls.py_op(scalar, x))
|
|
34
30
|
|
|
35
31
|
@classmethod
|
|
36
|
-
def apply_operation_two_series(cls,
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
right_series.astype('bool[pyarrow]'))
|
|
32
|
+
def apply_operation_two_series(cls, left_series: Any, right_series: Any) -> Any:
|
|
33
|
+
result = cls.comp_op(
|
|
34
|
+
left_series.astype("boolean"), right_series.astype("boolean")
|
|
35
|
+
)
|
|
41
36
|
return result.replace({pd.NA: None}).astype(object)
|
|
42
37
|
|
|
43
38
|
@classmethod
|
|
@@ -50,7 +45,7 @@ class And(Binary):
|
|
|
50
45
|
comp_op = pd.Series.__and__
|
|
51
46
|
|
|
52
47
|
@staticmethod
|
|
53
|
-
@numba.njit
|
|
48
|
+
# @numba.njit
|
|
54
49
|
def py_op(x: Optional[bool], y: Optional[bool]) -> Optional[bool]:
|
|
55
50
|
if (x is None and y == False) or (x == False and y is None):
|
|
56
51
|
return False
|
|
@@ -58,9 +53,9 @@ class And(Binary):
|
|
|
58
53
|
return None
|
|
59
54
|
return x and y
|
|
60
55
|
|
|
61
|
-
@classmethod
|
|
62
|
-
def spark_op(cls, x: pd.Series, y: pd.Series) -> pd.Series:
|
|
63
|
-
|
|
56
|
+
# @classmethod
|
|
57
|
+
# def spark_op(cls, x: pd.Series, y: pd.Series) -> pd.Series:
|
|
58
|
+
# return x & y
|
|
64
59
|
|
|
65
60
|
|
|
66
61
|
class Or(Binary):
|
|
@@ -68,7 +63,7 @@ class Or(Binary):
|
|
|
68
63
|
comp_op = pd.Series.__or__
|
|
69
64
|
|
|
70
65
|
@staticmethod
|
|
71
|
-
@numba.njit
|
|
66
|
+
# @numba.njit
|
|
72
67
|
def py_op(x: Optional[bool], y: Optional[bool]) -> Optional[bool]:
|
|
73
68
|
if (x is None and y == True) or (x == True and y is None):
|
|
74
69
|
return True
|
|
@@ -76,9 +71,9 @@ class Or(Binary):
|
|
|
76
71
|
return None
|
|
77
72
|
return x or y
|
|
78
73
|
|
|
79
|
-
@classmethod
|
|
80
|
-
def spark_op(cls, x: pd.Series, y: pd.Series) -> pd.Series:
|
|
81
|
-
|
|
74
|
+
# @classmethod
|
|
75
|
+
# def spark_op(cls, x: pd.Series, y: pd.Series) -> pd.Series:
|
|
76
|
+
# return x | y
|
|
82
77
|
|
|
83
78
|
|
|
84
79
|
class Xor(Binary):
|
|
@@ -91,23 +86,23 @@ class Xor(Binary):
|
|
|
91
86
|
return None
|
|
92
87
|
return (x and not y) or (not x and y)
|
|
93
88
|
|
|
94
|
-
@classmethod
|
|
95
|
-
def spark_op(cls, x: pd.Series, y: pd.Series) -> pd.Series:
|
|
96
|
-
|
|
89
|
+
# @classmethod
|
|
90
|
+
# def spark_op(cls, x: pd.Series, y: pd.Series) -> pd.Series:
|
|
91
|
+
# return x ^ y
|
|
97
92
|
|
|
98
93
|
|
|
99
94
|
class Not(Unary):
|
|
100
95
|
op = NOT
|
|
101
96
|
|
|
102
97
|
@staticmethod
|
|
103
|
-
@numba.njit
|
|
98
|
+
# @numba.njit
|
|
104
99
|
def py_op(x: Optional[bool]) -> Optional[bool]:
|
|
105
100
|
return None if x is None else not x
|
|
106
101
|
|
|
107
|
-
@classmethod
|
|
108
|
-
def spark_op(cls, series: pd.Series) -> pd.Series:
|
|
109
|
-
|
|
102
|
+
# @classmethod
|
|
103
|
+
# def spark_op(cls, series: pd.Series) -> pd.Series:
|
|
104
|
+
# return ~series
|
|
110
105
|
|
|
111
106
|
@classmethod
|
|
112
107
|
def apply_operation_component(cls, series: Any) -> Any:
|
|
113
|
-
return series.map(lambda x: not x, na_action=
|
|
108
|
+
return series.map(lambda x: not x, na_action="ignore")
|