vtlengine 1.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +153 -100
- vtlengine/API/__init__.py +109 -67
- vtlengine/AST/ASTConstructor.py +188 -98
- vtlengine/AST/ASTConstructorModules/Expr.py +306 -200
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +172 -102
- vtlengine/AST/ASTConstructorModules/Terminals.py +158 -95
- vtlengine/AST/ASTEncoders.py +1 -1
- vtlengine/AST/ASTTemplate.py +8 -9
- vtlengine/AST/ASTVisitor.py +8 -12
- vtlengine/AST/DAG/__init__.py +43 -35
- vtlengine/AST/DAG/_words.py +4 -4
- vtlengine/AST/Grammar/lexer.py +732 -142
- vtlengine/AST/Grammar/parser.py +2188 -826
- vtlengine/AST/Grammar/tokens.py +128 -128
- vtlengine/AST/VtlVisitor.py +7 -4
- vtlengine/AST/__init__.py +22 -11
- vtlengine/DataTypes/NumericTypesHandling.py +5 -4
- vtlengine/DataTypes/TimeHandling.py +194 -301
- vtlengine/DataTypes/__init__.py +304 -218
- vtlengine/Exceptions/__init__.py +52 -27
- vtlengine/Exceptions/messages.py +134 -62
- vtlengine/Interpreter/__init__.py +781 -487
- vtlengine/Model/__init__.py +165 -121
- vtlengine/Operators/Aggregation.py +156 -95
- vtlengine/Operators/Analytic.py +115 -59
- vtlengine/Operators/Assignment.py +7 -4
- vtlengine/Operators/Boolean.py +27 -32
- vtlengine/Operators/CastOperator.py +177 -131
- vtlengine/Operators/Clause.py +137 -99
- vtlengine/Operators/Comparison.py +148 -117
- vtlengine/Operators/Conditional.py +149 -98
- vtlengine/Operators/General.py +68 -47
- vtlengine/Operators/HROperators.py +91 -72
- vtlengine/Operators/Join.py +217 -118
- vtlengine/Operators/Numeric.py +89 -44
- vtlengine/Operators/RoleSetter.py +16 -15
- vtlengine/Operators/Set.py +61 -36
- vtlengine/Operators/String.py +213 -139
- vtlengine/Operators/Time.py +334 -216
- vtlengine/Operators/Validation.py +117 -76
- vtlengine/Operators/__init__.py +340 -213
- vtlengine/Utils/__init__.py +195 -40
- vtlengine/__init__.py +1 -1
- vtlengine/files/output/__init__.py +15 -6
- vtlengine/files/output/_time_period_representation.py +10 -9
- vtlengine/files/parser/__init__.py +77 -52
- vtlengine/files/parser/_rfc_dialect.py +6 -5
- vtlengine/files/parser/_time_checking.py +46 -37
- vtlengine-1.0.1.dist-info/METADATA +236 -0
- vtlengine-1.0.1.dist-info/RECORD +58 -0
- {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/WHEEL +1 -1
- vtlengine-1.0.dist-info/METADATA +0 -104
- vtlengine-1.0.dist-info/RECORD +0 -58
- {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/LICENSE.md +0 -0
vtlengine/Operators/Analytic.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import os
|
|
2
1
|
from copy import copy
|
|
3
2
|
from typing import List, Optional
|
|
4
3
|
|
|
@@ -6,21 +5,33 @@ import duckdb
|
|
|
6
5
|
|
|
7
6
|
from vtlengine.Exceptions import SemanticError
|
|
8
7
|
|
|
9
|
-
if os.environ.get("SPARK"):
|
|
10
|
-
|
|
11
|
-
else:
|
|
12
|
-
|
|
8
|
+
# if os.environ.get("SPARK"):
|
|
9
|
+
# import pyspark.pandas as pd
|
|
10
|
+
# else:
|
|
11
|
+
# import pandas as pd
|
|
12
|
+
import pandas as pd
|
|
13
13
|
|
|
14
14
|
import vtlengine.Operators as Operator
|
|
15
15
|
from vtlengine.AST import OrderBy, Windowing
|
|
16
|
-
from vtlengine.AST.Grammar.tokens import
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
16
|
+
from vtlengine.AST.Grammar.tokens import (
|
|
17
|
+
AVG,
|
|
18
|
+
COUNT,
|
|
19
|
+
FIRST_VALUE,
|
|
20
|
+
LAG,
|
|
21
|
+
LAST_VALUE,
|
|
22
|
+
LEAD,
|
|
23
|
+
MAX,
|
|
24
|
+
MEDIAN,
|
|
25
|
+
MIN,
|
|
26
|
+
RANK,
|
|
27
|
+
RATIO_TO_REPORT,
|
|
28
|
+
STDDEV_POP,
|
|
29
|
+
STDDEV_SAMP,
|
|
30
|
+
SUM,
|
|
31
|
+
VAR_POP,
|
|
32
|
+
VAR_SAMP,
|
|
33
|
+
)
|
|
34
|
+
from vtlengine.DataTypes import COMP_NAME_MAPPING, Integer, Number, unary_implicit_promotion
|
|
24
35
|
from vtlengine.Model import Component, Dataset, Role
|
|
25
36
|
|
|
26
37
|
|
|
@@ -36,14 +47,18 @@ class Analytic(Operator.Unary):
|
|
|
36
47
|
analyticfunc: Specify class method that returns a dataframe using the duckdb library.
|
|
37
48
|
Evaluate: Ensures the type of data is the correct one to perform the Analytic operators.
|
|
38
49
|
"""
|
|
39
|
-
|
|
50
|
+
|
|
51
|
+
sql_op: Optional[str] = None
|
|
40
52
|
|
|
41
53
|
@classmethod
|
|
42
|
-
def validate(
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
54
|
+
def validate( # type: ignore[override]
|
|
55
|
+
cls,
|
|
56
|
+
operand: Dataset,
|
|
57
|
+
partitioning: List[str],
|
|
58
|
+
ordering: Optional[List[OrderBy]],
|
|
59
|
+
window: Optional[Windowing],
|
|
60
|
+
params: Optional[List[int]],
|
|
61
|
+
) -> Dataset:
|
|
47
62
|
if ordering is None:
|
|
48
63
|
order_components = []
|
|
49
64
|
else:
|
|
@@ -53,15 +68,21 @@ class Analytic(Operator.Unary):
|
|
|
53
68
|
|
|
54
69
|
for comp_name in partitioning:
|
|
55
70
|
if comp_name not in operand.components:
|
|
56
|
-
raise SemanticError(
|
|
57
|
-
|
|
71
|
+
raise SemanticError(
|
|
72
|
+
"1-1-1-10", op=cls.op, comp_name=comp_name, dataset_name=operand.name
|
|
73
|
+
)
|
|
58
74
|
if comp_name not in identifier_names:
|
|
59
|
-
raise SemanticError(
|
|
60
|
-
|
|
75
|
+
raise SemanticError(
|
|
76
|
+
"1-1-3-2",
|
|
77
|
+
op=cls.op,
|
|
78
|
+
id_name=comp_name,
|
|
79
|
+
id_type=operand.components[comp_name].role,
|
|
80
|
+
)
|
|
61
81
|
for comp_name in order_components:
|
|
62
82
|
if comp_name not in operand.components:
|
|
63
|
-
raise SemanticError(
|
|
64
|
-
|
|
83
|
+
raise SemanticError(
|
|
84
|
+
"1-1-1-10", op=cls.op, comp_name=comp_name, dataset_name=operand.name
|
|
85
|
+
)
|
|
65
86
|
measures = operand.get_measures()
|
|
66
87
|
if measures is None:
|
|
67
88
|
raise SemanticError("1-1-1-8", op=cls.op, name=operand.name)
|
|
@@ -79,24 +100,26 @@ class Analytic(Operator.Unary):
|
|
|
79
100
|
if len(measures) == 1:
|
|
80
101
|
del result_components[measures[0].name]
|
|
81
102
|
result_components[measure_name] = Component(
|
|
82
|
-
name=measure_name,
|
|
83
|
-
data_type=cls.return_type,
|
|
84
|
-
role=Role.MEASURE,
|
|
85
|
-
nullable=nullable
|
|
103
|
+
name=measure_name, data_type=cls.return_type, role=Role.MEASURE, nullable=nullable
|
|
86
104
|
)
|
|
87
105
|
|
|
88
106
|
return Dataset(name="result", components=result_components, data=None)
|
|
89
107
|
|
|
90
108
|
@classmethod
|
|
91
|
-
def analyticfunc(
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
109
|
+
def analyticfunc(
|
|
110
|
+
cls,
|
|
111
|
+
df: pd.DataFrame,
|
|
112
|
+
partitioning: List[str],
|
|
113
|
+
identifier_names: List[str],
|
|
114
|
+
measure_names: List[str],
|
|
115
|
+
ordering: List[OrderBy],
|
|
116
|
+
window: Optional[Windowing],
|
|
117
|
+
params: Optional[List[int]] = None,
|
|
118
|
+
) -> pd.DataFrame:
|
|
97
119
|
"""Annotation class
|
|
98
120
|
|
|
99
|
-
It is used to analyze the attributes specified bellow
|
|
121
|
+
It is used to analyze the attributes specified bellow
|
|
122
|
+
ensuring that the type of data is the correct one to perform
|
|
100
123
|
the operation.
|
|
101
124
|
|
|
102
125
|
Attributes:
|
|
@@ -110,18 +133,26 @@ class Analytic(Operator.Unary):
|
|
|
110
133
|
window_str = ""
|
|
111
134
|
if window is not None:
|
|
112
135
|
mode = "ROWS" if window.type_ == "data" else "RANGE"
|
|
113
|
-
start_mode =
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
136
|
+
start_mode = (
|
|
137
|
+
window.start_mode
|
|
138
|
+
if window.start_mode != "current" and window.start != "CURRENT ROW"
|
|
139
|
+
else ""
|
|
140
|
+
)
|
|
141
|
+
stop_mode = (
|
|
142
|
+
window.stop_mode
|
|
143
|
+
if window.stop_mode != "current" and window.stop != "CURRENT ROW"
|
|
144
|
+
else ""
|
|
145
|
+
)
|
|
146
|
+
if isinstance(window.start, int) and window.start == -1:
|
|
147
|
+
window.start = "UNBOUNDED"
|
|
117
148
|
|
|
118
|
-
if stop_mode ==
|
|
119
|
-
window.stop =
|
|
149
|
+
if stop_mode == "" and window.stop == 0:
|
|
150
|
+
window.stop = "CURRENT ROW"
|
|
120
151
|
window_str = f"{mode} BETWEEN {window.start} {start_mode} AND {window.stop} {stop_mode}"
|
|
121
152
|
|
|
122
153
|
# Partitioning
|
|
123
154
|
if len(partitioning) > 0:
|
|
124
|
-
partition = "PARTITION BY " +
|
|
155
|
+
partition = "PARTITION BY " + ", ".join(partitioning)
|
|
125
156
|
else:
|
|
126
157
|
partition = ""
|
|
127
158
|
|
|
@@ -143,7 +174,7 @@ class Analytic(Operator.Unary):
|
|
|
143
174
|
elif cls.op == RATIO_TO_REPORT:
|
|
144
175
|
measure_query = f"CAST({measure} AS REAL) / SUM(CAST({measure} AS REAL))"
|
|
145
176
|
elif cls.op in [LAG, LEAD]:
|
|
146
|
-
measure_query = f"{cls.sql_op}({measure}, {','.join(map(str, params))})"
|
|
177
|
+
measure_query = f"{cls.sql_op}({measure}, {','.join(map(str, params or []))})"
|
|
147
178
|
else:
|
|
148
179
|
measure_query = f"{cls.sql_op}({measure})"
|
|
149
180
|
if cls.op == COUNT and len(measure_names) == 1:
|
|
@@ -153,33 +184,42 @@ class Analytic(Operator.Unary):
|
|
|
153
184
|
measure_queries.append(measure_query)
|
|
154
185
|
if cls.op == COUNT and len(measure_names) == 0:
|
|
155
186
|
measure_queries.append(
|
|
156
|
-
f"COUNT(*) {analytic_str} as {COMP_NAME_MAPPING[cls.return_type]}"
|
|
187
|
+
f"COUNT(*) {analytic_str} as {COMP_NAME_MAPPING[cls.return_type]}"
|
|
188
|
+
)
|
|
157
189
|
|
|
158
|
-
measures_sql =
|
|
159
|
-
identifiers_sql =
|
|
190
|
+
measures_sql = ", ".join(measure_queries)
|
|
191
|
+
identifiers_sql = ", ".join(identifier_names)
|
|
160
192
|
query = f"SELECT {identifiers_sql} , {measures_sql} FROM df"
|
|
161
193
|
|
|
162
194
|
if cls.op == COUNT:
|
|
163
195
|
df[measure_names] = df[measure_names].fillna(-1)
|
|
164
|
-
if os.getenv("SPARK", False):
|
|
165
|
-
|
|
196
|
+
# if os.getenv("SPARK", False):
|
|
197
|
+
# df = df.to_pandas()
|
|
166
198
|
return duckdb.query(query).to_df()
|
|
167
199
|
|
|
168
200
|
@classmethod
|
|
169
|
-
def evaluate(
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
201
|
+
def evaluate( # type: ignore[override]
|
|
202
|
+
cls,
|
|
203
|
+
operand: Dataset,
|
|
204
|
+
partitioning: List[str],
|
|
205
|
+
ordering: Optional[List[OrderBy]],
|
|
206
|
+
window: Optional[Windowing],
|
|
207
|
+
params: Optional[List[int]],
|
|
208
|
+
) -> Dataset:
|
|
174
209
|
result = cls.validate(operand, partitioning, ordering, window, params)
|
|
175
|
-
df = operand.data.copy()
|
|
210
|
+
df = operand.data.copy() if operand.data is not None else pd.DataFrame()
|
|
176
211
|
measure_names = operand.get_measures_names()
|
|
177
212
|
identifier_names = operand.get_identifiers_names()
|
|
178
213
|
|
|
179
|
-
result.data = cls.analyticfunc(
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
214
|
+
result.data = cls.analyticfunc(
|
|
215
|
+
df=df,
|
|
216
|
+
partitioning=partitioning,
|
|
217
|
+
identifier_names=identifier_names,
|
|
218
|
+
measure_names=measure_names,
|
|
219
|
+
ordering=ordering or [],
|
|
220
|
+
window=window,
|
|
221
|
+
params=params,
|
|
222
|
+
)
|
|
183
223
|
return result
|
|
184
224
|
|
|
185
225
|
|
|
@@ -187,6 +227,7 @@ class Max(Analytic):
|
|
|
187
227
|
"""
|
|
188
228
|
Max operator
|
|
189
229
|
"""
|
|
230
|
+
|
|
190
231
|
op = MAX
|
|
191
232
|
sql_op = "MAX"
|
|
192
233
|
|
|
@@ -195,6 +236,7 @@ class Min(Analytic):
|
|
|
195
236
|
"""
|
|
196
237
|
Min operator
|
|
197
238
|
"""
|
|
239
|
+
|
|
198
240
|
op = MIN
|
|
199
241
|
sql_op = "MIN"
|
|
200
242
|
|
|
@@ -203,6 +245,7 @@ class Sum(Analytic):
|
|
|
203
245
|
"""
|
|
204
246
|
Sum operator
|
|
205
247
|
"""
|
|
248
|
+
|
|
206
249
|
op = SUM
|
|
207
250
|
type_to_check = Number
|
|
208
251
|
return_type = Number
|
|
@@ -213,6 +256,7 @@ class Count(Analytic):
|
|
|
213
256
|
"""
|
|
214
257
|
Count operator
|
|
215
258
|
"""
|
|
259
|
+
|
|
216
260
|
op = COUNT
|
|
217
261
|
type_to_check = None
|
|
218
262
|
return_type = Integer
|
|
@@ -223,6 +267,7 @@ class Avg(Analytic):
|
|
|
223
267
|
"""
|
|
224
268
|
Average operator
|
|
225
269
|
"""
|
|
270
|
+
|
|
226
271
|
op = AVG
|
|
227
272
|
type_to_check = Number
|
|
228
273
|
return_type = Number
|
|
@@ -233,6 +278,7 @@ class Median(Analytic):
|
|
|
233
278
|
"""
|
|
234
279
|
Median operator
|
|
235
280
|
"""
|
|
281
|
+
|
|
236
282
|
op = MEDIAN
|
|
237
283
|
type_to_check = Number
|
|
238
284
|
return_type = Number
|
|
@@ -243,6 +289,7 @@ class PopulationStandardDeviation(Analytic):
|
|
|
243
289
|
"""
|
|
244
290
|
Population deviation operator
|
|
245
291
|
"""
|
|
292
|
+
|
|
246
293
|
op = STDDEV_POP
|
|
247
294
|
type_to_check = Number
|
|
248
295
|
return_type = Number
|
|
@@ -253,6 +300,7 @@ class SampleStandardDeviation(Analytic):
|
|
|
253
300
|
"""
|
|
254
301
|
Sample standard deviation operator.
|
|
255
302
|
"""
|
|
303
|
+
|
|
256
304
|
op = STDDEV_SAMP
|
|
257
305
|
type_to_check = Number
|
|
258
306
|
return_type = Number
|
|
@@ -263,6 +311,7 @@ class PopulationVariance(Analytic):
|
|
|
263
311
|
"""
|
|
264
312
|
Variance operator
|
|
265
313
|
"""
|
|
314
|
+
|
|
266
315
|
op = VAR_POP
|
|
267
316
|
type_to_check = Number
|
|
268
317
|
return_type = Number
|
|
@@ -273,6 +322,7 @@ class SampleVariance(Analytic):
|
|
|
273
322
|
"""
|
|
274
323
|
Sample variance operator
|
|
275
324
|
"""
|
|
325
|
+
|
|
276
326
|
op = VAR_SAMP
|
|
277
327
|
type_to_check = Number
|
|
278
328
|
return_type = Number
|
|
@@ -283,6 +333,7 @@ class FirstValue(Analytic):
|
|
|
283
333
|
"""
|
|
284
334
|
First value operator
|
|
285
335
|
"""
|
|
336
|
+
|
|
286
337
|
op = FIRST_VALUE
|
|
287
338
|
sql_op = "FIRST"
|
|
288
339
|
|
|
@@ -291,6 +342,7 @@ class LastValue(Analytic):
|
|
|
291
342
|
"""
|
|
292
343
|
Last value operator
|
|
293
344
|
"""
|
|
345
|
+
|
|
294
346
|
op = LAST_VALUE
|
|
295
347
|
sql_op = "LAST"
|
|
296
348
|
|
|
@@ -299,6 +351,7 @@ class Lag(Analytic):
|
|
|
299
351
|
"""
|
|
300
352
|
Lag operator
|
|
301
353
|
"""
|
|
354
|
+
|
|
302
355
|
op = LAG
|
|
303
356
|
sql_op = "LAG"
|
|
304
357
|
|
|
@@ -307,6 +360,7 @@ class Lead(Analytic):
|
|
|
307
360
|
"""
|
|
308
361
|
Lead operator
|
|
309
362
|
"""
|
|
363
|
+
|
|
310
364
|
op = LEAD
|
|
311
365
|
sql_op = "LEAD"
|
|
312
366
|
|
|
@@ -315,6 +369,7 @@ class Rank(Analytic):
|
|
|
315
369
|
"""
|
|
316
370
|
Rank operator
|
|
317
371
|
"""
|
|
372
|
+
|
|
318
373
|
op = RANK
|
|
319
374
|
sql_op = "RANK"
|
|
320
375
|
return_type = Integer
|
|
@@ -324,6 +379,7 @@ class RatioToReport(Analytic):
|
|
|
324
379
|
"""
|
|
325
380
|
Ratio operator
|
|
326
381
|
"""
|
|
382
|
+
|
|
327
383
|
op = RATIO_TO_REPORT
|
|
328
384
|
type_to_check = Number
|
|
329
385
|
return_type = Number
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Union
|
|
1
|
+
from typing import Union, Any
|
|
2
2
|
|
|
3
3
|
from vtlengine.Operators import Binary
|
|
4
4
|
|
|
@@ -11,12 +11,15 @@ ALL_MODEL_TYPES = Union[DataComponent, Dataset]
|
|
|
11
11
|
class Assignment(Binary):
|
|
12
12
|
|
|
13
13
|
@classmethod
|
|
14
|
-
def validate(cls, left_operand:
|
|
15
|
-
if
|
|
14
|
+
def validate(cls, left_operand: Any, right_operand: Any) -> ALL_MODEL_TYPES:
|
|
15
|
+
if (
|
|
16
|
+
isinstance(right_operand, DataComponent)
|
|
17
|
+
and right_operand.role.__str__() == "IDENTIFIER"
|
|
18
|
+
):
|
|
16
19
|
raise SemanticError("1-1-6-13", op=cls.op, comp_name=right_operand.name)
|
|
17
20
|
right_operand.name = left_operand
|
|
18
21
|
return right_operand
|
|
19
22
|
|
|
20
23
|
@classmethod
|
|
21
|
-
def evaluate(cls, left_operand:
|
|
24
|
+
def evaluate(cls, left_operand: Any, right_operand: Any) -> ALL_MODEL_TYPES:
|
|
22
25
|
return cls.validate(left_operand, right_operand)
|
vtlengine/Operators/Boolean.py
CHANGED
|
@@ -1,11 +1,8 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
import pyspark.pandas as pd
|
|
7
|
-
else:
|
|
8
|
-
import pandas as pd
|
|
1
|
+
# if os.environ.get("SPARK", False):
|
|
2
|
+
# import pyspark.pandas as pd
|
|
3
|
+
# else:
|
|
4
|
+
# import pandas as pd
|
|
5
|
+
import pandas as pd
|
|
9
6
|
|
|
10
7
|
from typing import Optional, Any
|
|
11
8
|
|
|
@@ -22,22 +19,20 @@ class Unary(Operator.Unary):
|
|
|
22
19
|
class Binary(Operator.Binary):
|
|
23
20
|
type_to_check = Boolean
|
|
24
21
|
return_type = Boolean
|
|
25
|
-
comp_op = None
|
|
22
|
+
comp_op: Any = None
|
|
26
23
|
|
|
27
24
|
@classmethod
|
|
28
|
-
def apply_operation_series_scalar(cls, series:
|
|
29
|
-
series_left: bool) -> Any:
|
|
25
|
+
def apply_operation_series_scalar(cls, series: Any, scalar: Any, series_left: bool) -> Any:
|
|
30
26
|
if series_left:
|
|
31
27
|
return series.map(lambda x: cls.py_op(x, scalar))
|
|
32
28
|
else:
|
|
33
29
|
return series.map(lambda x: cls.py_op(scalar, x))
|
|
34
30
|
|
|
35
31
|
@classmethod
|
|
36
|
-
def apply_operation_two_series(cls,
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
right_series.astype('bool[pyarrow]'))
|
|
32
|
+
def apply_operation_two_series(cls, left_series: Any, right_series: Any) -> Any:
|
|
33
|
+
result = cls.comp_op(
|
|
34
|
+
left_series.astype("bool[pyarrow]"), right_series.astype("bool[pyarrow]")
|
|
35
|
+
)
|
|
41
36
|
return result.replace({pd.NA: None}).astype(object)
|
|
42
37
|
|
|
43
38
|
@classmethod
|
|
@@ -50,7 +45,7 @@ class And(Binary):
|
|
|
50
45
|
comp_op = pd.Series.__and__
|
|
51
46
|
|
|
52
47
|
@staticmethod
|
|
53
|
-
@numba.njit
|
|
48
|
+
# @numba.njit
|
|
54
49
|
def py_op(x: Optional[bool], y: Optional[bool]) -> Optional[bool]:
|
|
55
50
|
if (x is None and y == False) or (x == False and y is None):
|
|
56
51
|
return False
|
|
@@ -58,9 +53,9 @@ class And(Binary):
|
|
|
58
53
|
return None
|
|
59
54
|
return x and y
|
|
60
55
|
|
|
61
|
-
@classmethod
|
|
62
|
-
def spark_op(cls, x: pd.Series, y: pd.Series) -> pd.Series:
|
|
63
|
-
|
|
56
|
+
# @classmethod
|
|
57
|
+
# def spark_op(cls, x: pd.Series, y: pd.Series) -> pd.Series:
|
|
58
|
+
# return x & y
|
|
64
59
|
|
|
65
60
|
|
|
66
61
|
class Or(Binary):
|
|
@@ -68,7 +63,7 @@ class Or(Binary):
|
|
|
68
63
|
comp_op = pd.Series.__or__
|
|
69
64
|
|
|
70
65
|
@staticmethod
|
|
71
|
-
@numba.njit
|
|
66
|
+
# @numba.njit
|
|
72
67
|
def py_op(x: Optional[bool], y: Optional[bool]) -> Optional[bool]:
|
|
73
68
|
if (x is None and y == True) or (x == True and y is None):
|
|
74
69
|
return True
|
|
@@ -76,9 +71,9 @@ class Or(Binary):
|
|
|
76
71
|
return None
|
|
77
72
|
return x or y
|
|
78
73
|
|
|
79
|
-
@classmethod
|
|
80
|
-
def spark_op(cls, x: pd.Series, y: pd.Series) -> pd.Series:
|
|
81
|
-
|
|
74
|
+
# @classmethod
|
|
75
|
+
# def spark_op(cls, x: pd.Series, y: pd.Series) -> pd.Series:
|
|
76
|
+
# return x | y
|
|
82
77
|
|
|
83
78
|
|
|
84
79
|
class Xor(Binary):
|
|
@@ -91,23 +86,23 @@ class Xor(Binary):
|
|
|
91
86
|
return None
|
|
92
87
|
return (x and not y) or (not x and y)
|
|
93
88
|
|
|
94
|
-
@classmethod
|
|
95
|
-
def spark_op(cls, x: pd.Series, y: pd.Series) -> pd.Series:
|
|
96
|
-
|
|
89
|
+
# @classmethod
|
|
90
|
+
# def spark_op(cls, x: pd.Series, y: pd.Series) -> pd.Series:
|
|
91
|
+
# return x ^ y
|
|
97
92
|
|
|
98
93
|
|
|
99
94
|
class Not(Unary):
|
|
100
95
|
op = NOT
|
|
101
96
|
|
|
102
97
|
@staticmethod
|
|
103
|
-
@numba.njit
|
|
98
|
+
# @numba.njit
|
|
104
99
|
def py_op(x: Optional[bool]) -> Optional[bool]:
|
|
105
100
|
return None if x is None else not x
|
|
106
101
|
|
|
107
|
-
@classmethod
|
|
108
|
-
def spark_op(cls, series: pd.Series) -> pd.Series:
|
|
109
|
-
|
|
102
|
+
# @classmethod
|
|
103
|
+
# def spark_op(cls, series: pd.Series) -> pd.Series:
|
|
104
|
+
# return ~series
|
|
110
105
|
|
|
111
106
|
@classmethod
|
|
112
107
|
def apply_operation_component(cls, series: Any) -> Any:
|
|
113
|
-
return series.map(lambda x: not x, na_action=
|
|
108
|
+
return series.map(lambda x: not x, na_action="ignore")
|