vtlengine 1.0.3rc3__py3-none-any.whl → 1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +288 -61
- vtlengine/API/__init__.py +269 -71
- vtlengine/API/data/schema/json_schema_2.1.json +116 -0
- vtlengine/AST/ASTComment.py +56 -0
- vtlengine/AST/ASTConstructor.py +76 -22
- vtlengine/AST/ASTConstructorModules/Expr.py +238 -120
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +126 -61
- vtlengine/AST/ASTConstructorModules/Terminals.py +97 -42
- vtlengine/AST/ASTConstructorModules/__init__.py +50 -0
- vtlengine/AST/ASTEncoders.py +5 -1
- vtlengine/AST/ASTString.py +608 -0
- vtlengine/AST/ASTTemplate.py +28 -2
- vtlengine/AST/DAG/__init__.py +10 -4
- vtlengine/AST/Grammar/lexer.py +0 -1
- vtlengine/AST/Grammar/parser.py +185 -440
- vtlengine/AST/VtlVisitor.py +0 -1
- vtlengine/AST/__init__.py +127 -14
- vtlengine/DataTypes/TimeHandling.py +50 -15
- vtlengine/DataTypes/__init__.py +79 -7
- vtlengine/Exceptions/__init__.py +3 -5
- vtlengine/Exceptions/messages.py +74 -105
- vtlengine/Interpreter/__init__.py +136 -46
- vtlengine/Model/__init__.py +14 -11
- vtlengine/Operators/Aggregation.py +17 -9
- vtlengine/Operators/Analytic.py +64 -20
- vtlengine/Operators/Assignment.py +0 -1
- vtlengine/Operators/CastOperator.py +44 -44
- vtlengine/Operators/Clause.py +16 -10
- vtlengine/Operators/Comparison.py +20 -12
- vtlengine/Operators/Conditional.py +47 -15
- vtlengine/Operators/General.py +9 -4
- vtlengine/Operators/HROperators.py +4 -14
- vtlengine/Operators/Join.py +15 -14
- vtlengine/Operators/Numeric.py +32 -26
- vtlengine/Operators/RoleSetter.py +6 -2
- vtlengine/Operators/Set.py +12 -8
- vtlengine/Operators/String.py +9 -9
- vtlengine/Operators/Time.py +145 -124
- vtlengine/Operators/Validation.py +10 -4
- vtlengine/Operators/__init__.py +56 -69
- vtlengine/Utils/__init__.py +55 -1
- vtlengine/__extras_check.py +17 -0
- vtlengine/__init__.py +2 -2
- vtlengine/files/output/__init__.py +2 -1
- vtlengine/files/output/_time_period_representation.py +2 -1
- vtlengine/files/parser/__init__.py +52 -46
- vtlengine/files/parser/_time_checking.py +4 -4
- {vtlengine-1.0.3rc3.dist-info → vtlengine-1.1.dist-info}/METADATA +21 -17
- vtlengine-1.1.dist-info/RECORD +61 -0
- {vtlengine-1.0.3rc3.dist-info → vtlengine-1.1.dist-info}/WHEEL +1 -1
- vtlengine/DataTypes/NumericTypesHandling.py +0 -38
- vtlengine-1.0.3rc3.dist-info/RECORD +0 -58
- {vtlengine-1.0.3rc3.dist-info → vtlengine-1.1.dist-info}/LICENSE.md +0 -0
vtlengine/Operators/Analytic.py
CHANGED
|
@@ -29,10 +29,17 @@ from vtlengine.AST.Grammar.tokens import (
|
|
|
29
29
|
VAR_POP,
|
|
30
30
|
VAR_SAMP,
|
|
31
31
|
)
|
|
32
|
-
from vtlengine.DataTypes import
|
|
32
|
+
from vtlengine.DataTypes import (
|
|
33
|
+
COMP_NAME_MAPPING,
|
|
34
|
+
Integer,
|
|
35
|
+
Number,
|
|
36
|
+
unary_implicit_promotion,
|
|
37
|
+
)
|
|
33
38
|
from vtlengine.Exceptions import SemanticError
|
|
34
39
|
from vtlengine.Model import Component, Dataset, Role
|
|
35
40
|
|
|
41
|
+
return_integer_operators = [MAX, MIN, SUM]
|
|
42
|
+
|
|
36
43
|
|
|
37
44
|
# noinspection PyMethodOverriding
|
|
38
45
|
class Analytic(Operator.Unary):
|
|
@@ -47,10 +54,11 @@ class Analytic(Operator.Unary):
|
|
|
47
54
|
Evaluate: Ensures the type of data is the correct one to perform the Analytic operators.
|
|
48
55
|
"""
|
|
49
56
|
|
|
57
|
+
return_integer = None
|
|
50
58
|
sql_op: Optional[str] = None
|
|
51
59
|
|
|
52
60
|
@classmethod
|
|
53
|
-
def validate( # type: ignore[override]
|
|
61
|
+
def validate( # type: ignore[override] # noqa: C901
|
|
54
62
|
cls,
|
|
55
63
|
operand: Dataset,
|
|
56
64
|
partitioning: List[str],
|
|
@@ -66,7 +74,10 @@ class Analytic(Operator.Unary):
|
|
|
66
74
|
for comp_name in partitioning:
|
|
67
75
|
if comp_name not in operand.components:
|
|
68
76
|
raise SemanticError(
|
|
69
|
-
"1-1-1-10",
|
|
77
|
+
"1-1-1-10",
|
|
78
|
+
op=cls.op,
|
|
79
|
+
comp_name=comp_name,
|
|
80
|
+
dataset_name=operand.name,
|
|
70
81
|
)
|
|
71
82
|
if comp_name not in identifier_names:
|
|
72
83
|
raise SemanticError(
|
|
@@ -78,14 +89,21 @@ class Analytic(Operator.Unary):
|
|
|
78
89
|
for comp_name in order_components:
|
|
79
90
|
if comp_name not in operand.components:
|
|
80
91
|
raise SemanticError(
|
|
81
|
-
"1-1-1-10",
|
|
92
|
+
"1-1-1-10",
|
|
93
|
+
op=cls.op,
|
|
94
|
+
comp_name=comp_name,
|
|
95
|
+
dataset_name=operand.name,
|
|
82
96
|
)
|
|
83
97
|
if component_name is not None:
|
|
84
98
|
if cls.type_to_check is not None:
|
|
85
99
|
unary_implicit_promotion(
|
|
86
100
|
operand.components[component_name].data_type, cls.type_to_check
|
|
87
101
|
)
|
|
88
|
-
|
|
102
|
+
|
|
103
|
+
if cls.op in return_integer_operators:
|
|
104
|
+
cls.return_integer = isinstance(cls.return_type, Integer)
|
|
105
|
+
|
|
106
|
+
elif cls.return_type is not None:
|
|
89
107
|
result_components[component_name] = Component(
|
|
90
108
|
name=component_name,
|
|
91
109
|
data_type=cls.return_type,
|
|
@@ -106,14 +124,28 @@ class Analytic(Operator.Unary):
|
|
|
106
124
|
measures = operand.get_measures()
|
|
107
125
|
if len(measures) == 0:
|
|
108
126
|
raise SemanticError("1-1-1-8", op=cls.op, name=operand.name)
|
|
127
|
+
|
|
128
|
+
if cls.op in return_integer_operators:
|
|
129
|
+
isNumber = False
|
|
130
|
+
for measure in measures:
|
|
131
|
+
isNumber |= isinstance(measure.data_type, Number)
|
|
132
|
+
cls.return_integer = not isNumber
|
|
133
|
+
|
|
109
134
|
if cls.type_to_check is not None:
|
|
110
135
|
for measure in measures:
|
|
111
136
|
unary_implicit_promotion(measure.data_type, cls.type_to_check)
|
|
112
|
-
|
|
137
|
+
|
|
138
|
+
if cls.op in return_integer_operators:
|
|
139
|
+
for measure in measures:
|
|
140
|
+
new_measure = copy(measure)
|
|
141
|
+
new_measure.data_type = Integer if cls.return_integer else Number
|
|
142
|
+
result_components[measure.name] = new_measure
|
|
143
|
+
elif cls.return_type is not None:
|
|
113
144
|
for measure in measures:
|
|
114
145
|
new_measure = copy(measure)
|
|
115
146
|
new_measure.data_type = cls.return_type
|
|
116
147
|
result_components[measure.name] = new_measure
|
|
148
|
+
|
|
117
149
|
if cls.op == COUNT and len(measures) <= 1:
|
|
118
150
|
measure_name = COMP_NAME_MAPPING[cls.return_type]
|
|
119
151
|
nullable = False if len(measures) == 0 else measures[0].nullable
|
|
@@ -157,21 +189,26 @@ class Analytic(Operator.Unary):
|
|
|
157
189
|
if window is not None:
|
|
158
190
|
mode = "ROWS" if window.type_ == "data" else "RANGE"
|
|
159
191
|
start_mode = (
|
|
160
|
-
window.start_mode
|
|
161
|
-
if window.
|
|
192
|
+
window.start_mode.upper()
|
|
193
|
+
if (isinstance(window.start, int) and window.start != 0)
|
|
194
|
+
or (isinstance(window.start, str) and window.start == "unbounded")
|
|
162
195
|
else ""
|
|
163
196
|
)
|
|
164
197
|
stop_mode = (
|
|
165
|
-
window.stop_mode
|
|
166
|
-
if window.
|
|
198
|
+
window.stop_mode.upper()
|
|
199
|
+
if (isinstance(window.stop, int) and window.stop != 0)
|
|
200
|
+
or (isinstance(window.stop, str) and window.stop == "unbounded")
|
|
167
201
|
else ""
|
|
168
202
|
)
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
203
|
+
start = (
|
|
204
|
+
"UNBOUNDED"
|
|
205
|
+
if window.start == "unbounded" or window.start == -1
|
|
206
|
+
else str(window.start)
|
|
207
|
+
)
|
|
208
|
+
stop = (
|
|
209
|
+
"CURRENT ROW" if window.stop == "current" or window.stop == 0 else str(window.stop)
|
|
210
|
+
)
|
|
211
|
+
window_str = f"{mode} BETWEEN {start} {start_mode} AND {stop} {stop_mode}"
|
|
175
212
|
|
|
176
213
|
# Partitioning
|
|
177
214
|
partition = "PARTITION BY " + ", ".join(partitioning) if len(partitioning) > 0 else ""
|
|
@@ -192,13 +229,15 @@ class Analytic(Operator.Unary):
|
|
|
192
229
|
if cls.op == RANK:
|
|
193
230
|
measure_query = f"{cls.sql_op}()"
|
|
194
231
|
elif cls.op == RATIO_TO_REPORT:
|
|
195
|
-
measure_query = f"CAST({measure} AS
|
|
232
|
+
measure_query = f"CAST({measure} AS DOUBLE) / SUM(CAST({measure} AS DOUBLE))"
|
|
196
233
|
elif cls.op in [LAG, LEAD]:
|
|
197
234
|
measure_query = f"{cls.sql_op}({measure}, {','.join(map(str, params or []))})"
|
|
198
235
|
else:
|
|
199
236
|
measure_query = f"{cls.sql_op}({measure})"
|
|
200
237
|
if cls.op == COUNT and len(measure_names) == 1:
|
|
201
238
|
measure_query += f" {analytic_str} as {COMP_NAME_MAPPING[cls.return_type]}"
|
|
239
|
+
elif cls.op in return_integer_operators and cls.return_integer:
|
|
240
|
+
measure_query = f"CAST({measure_query} {analytic_str} AS INTEGER) as {measure}"
|
|
202
241
|
else:
|
|
203
242
|
measure_query += f" {analytic_str} as {measure}"
|
|
204
243
|
measure_queries.append(measure_query)
|
|
@@ -215,7 +254,7 @@ class Analytic(Operator.Unary):
|
|
|
215
254
|
df[measure_names] = df[measure_names].fillna(-1)
|
|
216
255
|
# if os.getenv("SPARK", False):
|
|
217
256
|
# df = df.to_pandas()
|
|
218
|
-
return duckdb.query(query).to_df()
|
|
257
|
+
return duckdb.query(query).to_df().astype(object)
|
|
219
258
|
|
|
220
259
|
@classmethod
|
|
221
260
|
def evaluate( # type: ignore[override]
|
|
@@ -245,6 +284,10 @@ class Analytic(Operator.Unary):
|
|
|
245
284
|
window=window,
|
|
246
285
|
params=params,
|
|
247
286
|
)
|
|
287
|
+
|
|
288
|
+
# if cls.return_type == Integer:
|
|
289
|
+
# result.data[measure_names] = result.data[measure_names].astype('Int64')
|
|
290
|
+
|
|
248
291
|
return result
|
|
249
292
|
|
|
250
293
|
|
|
@@ -255,6 +298,7 @@ class Max(Analytic):
|
|
|
255
298
|
|
|
256
299
|
op = MAX
|
|
257
300
|
sql_op = "MAX"
|
|
301
|
+
return_integer = False
|
|
258
302
|
|
|
259
303
|
|
|
260
304
|
class Min(Analytic):
|
|
@@ -264,6 +308,7 @@ class Min(Analytic):
|
|
|
264
308
|
|
|
265
309
|
op = MIN
|
|
266
310
|
sql_op = "MIN"
|
|
311
|
+
return_integer = False
|
|
267
312
|
|
|
268
313
|
|
|
269
314
|
class Sum(Analytic):
|
|
@@ -272,9 +317,8 @@ class Sum(Analytic):
|
|
|
272
317
|
"""
|
|
273
318
|
|
|
274
319
|
op = SUM
|
|
275
|
-
type_to_check = Number
|
|
276
|
-
return_type = Number
|
|
277
320
|
sql_op = "SUM"
|
|
321
|
+
return_integer = False
|
|
278
322
|
|
|
279
323
|
|
|
280
324
|
class Count(Analytic):
|
|
@@ -34,43 +34,43 @@ class Cast(Operator.Unary):
|
|
|
34
34
|
# CASTS VALUES
|
|
35
35
|
# Converts the value from one type to another in a way that is according to the mask
|
|
36
36
|
@classmethod
|
|
37
|
-
def cast_string_to_number(cls,
|
|
37
|
+
def cast_string_to_number(cls, value: Any, mask: str) -> Any:
|
|
38
38
|
"""
|
|
39
39
|
This method casts a string to a number, according to the mask.
|
|
40
40
|
|
|
41
41
|
"""
|
|
42
42
|
|
|
43
|
-
raise NotImplementedError("How this
|
|
43
|
+
raise NotImplementedError("How this mask should be implemented is not yet defined.")
|
|
44
44
|
|
|
45
45
|
@classmethod
|
|
46
|
-
def cast_string_to_date(cls,
|
|
46
|
+
def cast_string_to_date(cls, value: Any, mask: str) -> Any:
|
|
47
47
|
"""
|
|
48
48
|
This method casts a string to a number, according to the mask.
|
|
49
49
|
|
|
50
50
|
"""
|
|
51
51
|
|
|
52
|
-
raise NotImplementedError("How this
|
|
52
|
+
raise NotImplementedError("How this mask should be implemented is not yet defined.")
|
|
53
53
|
|
|
54
54
|
@classmethod
|
|
55
|
-
def cast_string_to_duration(cls,
|
|
55
|
+
def cast_string_to_duration(cls, value: Any, mask: str) -> Any:
|
|
56
56
|
"""
|
|
57
57
|
This method casts a string to a duration, according to the mask.
|
|
58
58
|
|
|
59
59
|
"""
|
|
60
60
|
|
|
61
|
-
raise NotImplementedError("How this
|
|
61
|
+
raise NotImplementedError("How this mask should be implemented is not yet defined.")
|
|
62
62
|
|
|
63
63
|
@classmethod
|
|
64
|
-
def cast_string_to_time_period(cls,
|
|
64
|
+
def cast_string_to_time_period(cls, value: Any, mask: str) -> Any:
|
|
65
65
|
"""
|
|
66
66
|
This method casts a string to a time period, according to the mask.
|
|
67
67
|
|
|
68
68
|
"""
|
|
69
69
|
|
|
70
|
-
raise NotImplementedError("How this
|
|
70
|
+
raise NotImplementedError("How this mask should be implemented is not yet defined.")
|
|
71
71
|
|
|
72
72
|
@classmethod
|
|
73
|
-
def cast_string_to_time(cls,
|
|
73
|
+
def cast_string_to_time(cls, value: Any, mask: str) -> Any:
|
|
74
74
|
"""
|
|
75
75
|
This method casts a string to a time, according to the mask.
|
|
76
76
|
|
|
@@ -78,20 +78,21 @@ class Cast(Operator.Unary):
|
|
|
78
78
|
|
|
79
79
|
raise NotImplementedError("How this cast should be implemented is not yet defined.")
|
|
80
80
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
81
|
+
#
|
|
82
|
+
# @classmethod
|
|
83
|
+
# def cast_date_to_string(cls, value: Any, mask: str) -> Any:
|
|
84
|
+
# """ """
|
|
85
|
+
# return NotImplementedError("How this cast should be implemented is not yet defined.")
|
|
86
|
+
#
|
|
87
|
+
# @classmethod
|
|
88
|
+
# def cast_duration_to_string(cls, value: Any, mask: str) -> Any:
|
|
89
|
+
# """ """
|
|
90
|
+
# return NotImplementedError("How this cast should be implemented is not yet defined.")
|
|
91
|
+
#
|
|
92
|
+
# @classmethod
|
|
93
|
+
# def cast_time_to_string(cls, value: Any, mask: str) -> Any:
|
|
94
|
+
# """ """
|
|
95
|
+
# return NotImplementedError("How this cast should be implemented is not yet defined.")
|
|
95
96
|
|
|
96
97
|
@classmethod
|
|
97
98
|
def cast_time_period_to_date(cls, value: Any, mask_value: str) -> Any:
|
|
@@ -142,7 +143,6 @@ class Cast(Operator.Unary):
|
|
|
142
143
|
|
|
143
144
|
@classmethod
|
|
144
145
|
def check_mask_value_from_time_period_to_date(cls, mask_value: str) -> None:
|
|
145
|
-
|
|
146
146
|
if mask_value not in ["START", "END"]:
|
|
147
147
|
raise SemanticError("1-1-5-4", op=cls.op, type_1="Time_Period", type_2="Date")
|
|
148
148
|
|
|
@@ -180,9 +180,11 @@ class Cast(Operator.Unary):
|
|
|
180
180
|
|
|
181
181
|
@classmethod
|
|
182
182
|
def check_cast(
|
|
183
|
-
cls,
|
|
183
|
+
cls,
|
|
184
|
+
from_type: Type[ScalarType],
|
|
185
|
+
to_type: Type[ScalarType],
|
|
186
|
+
mask_value: Optional[str],
|
|
184
187
|
) -> None:
|
|
185
|
-
|
|
186
188
|
if mask_value is not None:
|
|
187
189
|
cls.check_with_mask(from_type, to_type, mask_value)
|
|
188
190
|
else:
|
|
@@ -192,7 +194,6 @@ class Cast(Operator.Unary):
|
|
|
192
194
|
def check_with_mask(
|
|
193
195
|
cls, from_type: Type[ScalarType], to_type: Type[ScalarType], mask_value: str
|
|
194
196
|
) -> None:
|
|
195
|
-
|
|
196
197
|
explicit_promotion = EXPLICIT_WITH_MASK_TYPE_PROMOTION_MAPPING[from_type]
|
|
197
198
|
if to_type.is_included(explicit_promotion):
|
|
198
199
|
return cls.check_mask_value(from_type, to_type, mask_value)
|
|
@@ -207,7 +208,6 @@ class Cast(Operator.Unary):
|
|
|
207
208
|
|
|
208
209
|
@classmethod
|
|
209
210
|
def check_without_mask(cls, from_type: Type[ScalarType], to_type: Type[ScalarType]) -> None:
|
|
210
|
-
|
|
211
211
|
explicit_promotion = EXPLICIT_WITHOUT_MASK_TYPE_PROMOTION_MAPPING[from_type]
|
|
212
212
|
implicit_promotion = IMPLICIT_TYPE_PROMOTION_MAPPING[from_type]
|
|
213
213
|
if not (to_type.is_included(explicit_promotion) or to_type.is_included(implicit_promotion)):
|
|
@@ -231,7 +231,7 @@ class Cast(Operator.Unary):
|
|
|
231
231
|
cls, data: Any, from_type: Type[ScalarType], to_type: Type[ScalarType]
|
|
232
232
|
) -> Any:
|
|
233
233
|
"""
|
|
234
|
-
|
|
234
|
+
Cast the component to the type to_type without mask
|
|
235
235
|
"""
|
|
236
236
|
|
|
237
237
|
if to_type.is_included(IMPLICIT_TYPE_PROMOTION_MAPPING[from_type]):
|
|
@@ -242,15 +242,17 @@ class Cast(Operator.Unary):
|
|
|
242
242
|
|
|
243
243
|
@classmethod
|
|
244
244
|
def cast_mask_component(cls, data: Any, from_type: Any, to_type: Any, mask: str) -> Any:
|
|
245
|
-
|
|
246
245
|
result = data.map(lambda x: cls.cast_value(x, from_type, to_type, mask), na_action="ignore")
|
|
247
246
|
return result
|
|
248
247
|
|
|
249
248
|
@classmethod
|
|
250
249
|
def cast_value(
|
|
251
|
-
cls,
|
|
250
|
+
cls,
|
|
251
|
+
value: Any,
|
|
252
|
+
provided_type: Type[ScalarType],
|
|
253
|
+
to_type: Type[ScalarType],
|
|
254
|
+
mask_value: str,
|
|
252
255
|
) -> Any:
|
|
253
|
-
|
|
254
256
|
if provided_type == String and to_type == Number:
|
|
255
257
|
return cls.cast_string_to_number(value, mask_value)
|
|
256
258
|
if provided_type == String and to_type == Date:
|
|
@@ -261,12 +263,12 @@ class Cast(Operator.Unary):
|
|
|
261
263
|
return cls.cast_string_to_time_period(value, mask_value)
|
|
262
264
|
if provided_type == String and to_type == TimeInterval:
|
|
263
265
|
return cls.cast_string_to_time(value, mask_value)
|
|
264
|
-
if provided_type == Date and to_type == String:
|
|
265
|
-
|
|
266
|
-
if provided_type == Duration and to_type == String:
|
|
267
|
-
|
|
268
|
-
if provided_type == TimeInterval and to_type == String:
|
|
269
|
-
|
|
266
|
+
# if provided_type == Date and to_type == String:
|
|
267
|
+
# return cls.cast_date_to_string(value, mask_value)
|
|
268
|
+
# if provided_type == Duration and to_type == String:
|
|
269
|
+
# return cls.cast_duration_to_string(value, mask_value)
|
|
270
|
+
# if provided_type == TimeInterval and to_type == String:
|
|
271
|
+
# return cls.cast_time_to_string(value, mask_value)
|
|
270
272
|
if provided_type == TimePeriod and to_type == Date:
|
|
271
273
|
return cls.cast_time_period_to_date(value, mask_value)
|
|
272
274
|
|
|
@@ -285,7 +287,6 @@ class Cast(Operator.Unary):
|
|
|
285
287
|
scalarType: Type[ScalarType],
|
|
286
288
|
mask: Optional[str] = None,
|
|
287
289
|
) -> Any:
|
|
288
|
-
|
|
289
290
|
if mask is not None and not isinstance(mask, str):
|
|
290
291
|
raise Exception(f"{cls.op} mask must be a string")
|
|
291
292
|
|
|
@@ -325,7 +326,10 @@ class Cast(Operator.Unary):
|
|
|
325
326
|
else:
|
|
326
327
|
measure_name = measure.name
|
|
327
328
|
result_components[measure_name] = Component(
|
|
328
|
-
name=measure_name,
|
|
329
|
+
name=measure_name,
|
|
330
|
+
data_type=to_type,
|
|
331
|
+
role=Role.MEASURE,
|
|
332
|
+
nullable=measure.nullable,
|
|
329
333
|
)
|
|
330
334
|
return Dataset(name="result", components=result_components, data=None)
|
|
331
335
|
|
|
@@ -366,7 +370,6 @@ class Cast(Operator.Unary):
|
|
|
366
370
|
scalarType: Type[ScalarType],
|
|
367
371
|
mask: Optional[str] = None,
|
|
368
372
|
) -> Any:
|
|
369
|
-
|
|
370
373
|
if isinstance(operand, Dataset):
|
|
371
374
|
return cls.dataset_evaluation(operand, scalarType, mask)
|
|
372
375
|
if isinstance(operand, Scalar):
|
|
@@ -381,7 +384,6 @@ class Cast(Operator.Unary):
|
|
|
381
384
|
to_type: Type[ScalarType],
|
|
382
385
|
mask: Optional[str] = None,
|
|
383
386
|
) -> Dataset:
|
|
384
|
-
|
|
385
387
|
from_type = operand.get_measures()[0].data_type
|
|
386
388
|
original_measure = operand.get_measures()[0]
|
|
387
389
|
result_dataset = cls.dataset_validation(operand, to_type, mask)
|
|
@@ -410,7 +412,6 @@ class Cast(Operator.Unary):
|
|
|
410
412
|
to_type: Type[ScalarType],
|
|
411
413
|
mask: Optional[str] = None,
|
|
412
414
|
) -> Scalar:
|
|
413
|
-
|
|
414
415
|
from_type = operand.data_type
|
|
415
416
|
result_scalar = cls.scalar_validation(operand, to_type, mask)
|
|
416
417
|
if pd.isna(operand.value):
|
|
@@ -431,7 +432,6 @@ class Cast(Operator.Unary):
|
|
|
431
432
|
to_type: Type[ScalarType],
|
|
432
433
|
mask: Optional[str] = None,
|
|
433
434
|
) -> DataComponent:
|
|
434
|
-
|
|
435
435
|
from_type = operand.data_type
|
|
436
436
|
result_component = cls.component_validation(operand, to_type, mask)
|
|
437
437
|
if mask:
|
vtlengine/Operators/Clause.py
CHANGED
|
@@ -22,12 +22,10 @@ class Calc(Operator):
|
|
|
22
22
|
|
|
23
23
|
@classmethod
|
|
24
24
|
def validate(cls, operands: List[Union[DataComponent, Scalar]], dataset: Dataset) -> Dataset:
|
|
25
|
-
|
|
26
25
|
result_components = {name: copy(comp) for name, comp in dataset.components.items()}
|
|
27
26
|
result_dataset = Dataset(name=dataset.name, components=result_components, data=None)
|
|
28
27
|
|
|
29
28
|
for operand in operands:
|
|
30
|
-
|
|
31
29
|
if operand.name in result_dataset.components:
|
|
32
30
|
if result_dataset.components[operand.name].role == Role.IDENTIFIER:
|
|
33
31
|
raise SemanticError("1-1-6-13", op=cls.op, comp_name=operand.name)
|
|
@@ -72,7 +70,6 @@ class Aggregate(Operator):
|
|
|
72
70
|
|
|
73
71
|
@classmethod
|
|
74
72
|
def validate(cls, operands: List[Union[DataComponent, Scalar]], dataset: Dataset) -> Dataset:
|
|
75
|
-
|
|
76
73
|
result_dataset = Dataset(name=dataset.name, components=dataset.components, data=None)
|
|
77
74
|
|
|
78
75
|
for operand in operands:
|
|
@@ -121,7 +118,6 @@ class Aggregate(Operator):
|
|
|
121
118
|
|
|
122
119
|
|
|
123
120
|
class Filter(Operator):
|
|
124
|
-
|
|
125
121
|
@classmethod
|
|
126
122
|
def validate(cls, condition: DataComponent, dataset: Dataset) -> Dataset:
|
|
127
123
|
if condition.data_type != Boolean:
|
|
@@ -212,11 +208,17 @@ class Rename(Operator):
|
|
|
212
208
|
for operand in operands:
|
|
213
209
|
if operand.old_name not in dataset.components:
|
|
214
210
|
raise SemanticError(
|
|
215
|
-
"1-1-1-10",
|
|
211
|
+
"1-1-1-10",
|
|
212
|
+
op=cls.op,
|
|
213
|
+
comp_name=operand.old_name,
|
|
214
|
+
dataset_name=dataset.name,
|
|
216
215
|
)
|
|
217
216
|
if operand.new_name in dataset.components:
|
|
218
217
|
raise SemanticError(
|
|
219
|
-
"1-1-6-8",
|
|
218
|
+
"1-1-6-8",
|
|
219
|
+
op=cls.op,
|
|
220
|
+
comp_name=operand.new_name,
|
|
221
|
+
dataset_name=dataset.name,
|
|
220
222
|
)
|
|
221
223
|
|
|
222
224
|
result_components = {comp.name: comp for comp in dataset.components.values()}
|
|
@@ -242,7 +244,6 @@ class Rename(Operator):
|
|
|
242
244
|
|
|
243
245
|
|
|
244
246
|
class Pivot(Operator):
|
|
245
|
-
|
|
246
247
|
@classmethod
|
|
247
248
|
def validate(cls, operands: List[str], dataset: Dataset) -> Dataset:
|
|
248
249
|
raise NotImplementedError
|
|
@@ -253,7 +254,6 @@ class Pivot(Operator):
|
|
|
253
254
|
|
|
254
255
|
|
|
255
256
|
class Unpivot(Operator):
|
|
256
|
-
|
|
257
257
|
@classmethod
|
|
258
258
|
def validate(cls, operands: List[str], dataset: Dataset) -> Dataset:
|
|
259
259
|
if len(operands) != 2:
|
|
@@ -311,11 +311,17 @@ class Sub(Operator):
|
|
|
311
311
|
for operand in operands:
|
|
312
312
|
if operand.name not in dataset.components:
|
|
313
313
|
raise SemanticError(
|
|
314
|
-
"1-1-1-10",
|
|
314
|
+
"1-1-1-10",
|
|
315
|
+
op=cls.op,
|
|
316
|
+
comp_name=operand.name,
|
|
317
|
+
dataset_name=dataset.name,
|
|
315
318
|
)
|
|
316
319
|
if operand.role != Role.IDENTIFIER:
|
|
317
320
|
raise SemanticError(
|
|
318
|
-
"1-1-6-10",
|
|
321
|
+
"1-1-6-10",
|
|
322
|
+
op=cls.op,
|
|
323
|
+
operand=operand.name,
|
|
324
|
+
dataset_name=dataset.name,
|
|
319
325
|
)
|
|
320
326
|
if isinstance(operand, Scalar):
|
|
321
327
|
raise SemanticError("1-1-6-5", op=cls.op, name=operand.name)
|
|
@@ -74,10 +74,11 @@ class Binary(Operator.Binary):
|
|
|
74
74
|
return_type = Boolean
|
|
75
75
|
|
|
76
76
|
@classmethod
|
|
77
|
-
def _cast_values(
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
77
|
+
def _cast_values(
|
|
78
|
+
cls,
|
|
79
|
+
x: Optional[Union[int, float, str, bool]],
|
|
80
|
+
y: Optional[Union[int, float, str, bool]],
|
|
81
|
+
) -> Any:
|
|
81
82
|
# Cast values to compatible types for comparison
|
|
82
83
|
try:
|
|
83
84
|
if isinstance(x, str) and isinstance(y, bool):
|
|
@@ -247,9 +248,7 @@ class Between(Operator.Operator):
|
|
|
247
248
|
z: Optional[Union[int, float, bool, str]],
|
|
248
249
|
) -> Optional[bool]:
|
|
249
250
|
return (
|
|
250
|
-
None
|
|
251
|
-
if (pd.isnull(x) or pd.isnull(y) or pd.isnull(z))
|
|
252
|
-
else y <= x <= z # type: ignore[operator]
|
|
251
|
+
None if (pd.isnull(x) or pd.isnull(y) or pd.isnull(z)) else y <= x <= z # type: ignore[operator]
|
|
253
252
|
)
|
|
254
253
|
|
|
255
254
|
@classmethod
|
|
@@ -264,7 +263,8 @@ class Between(Operator.Operator):
|
|
|
264
263
|
to_data = pd.Series(to_data, index=series.index)
|
|
265
264
|
df = pd.DataFrame({"operand": series, "from_data": from_data, "to_data": to_data})
|
|
266
265
|
return df.apply(
|
|
267
|
-
lambda x: cls.op_func(x["operand"], x["from_data"], x["to_data"]),
|
|
266
|
+
lambda x: cls.op_func(x["operand"], x["from_data"], x["to_data"]),
|
|
267
|
+
axis=1,
|
|
268
268
|
)
|
|
269
269
|
|
|
270
270
|
return series.map(lambda x: cls.op_func(x, from_data, to_data))
|
|
@@ -310,13 +310,19 @@ class Between(Operator.Operator):
|
|
|
310
310
|
result = Dataset(name=operand.name, components=result_components, data=None)
|
|
311
311
|
elif isinstance(operand, DataComponent):
|
|
312
312
|
result = DataComponent(
|
|
313
|
-
name=operand.name,
|
|
313
|
+
name=operand.name,
|
|
314
|
+
data=None,
|
|
315
|
+
data_type=cls.return_type,
|
|
316
|
+
role=operand.role,
|
|
314
317
|
)
|
|
315
318
|
elif isinstance(from_, Scalar) and isinstance(to, Scalar):
|
|
316
319
|
result = Scalar(name=operand.name, value=None, data_type=cls.return_type)
|
|
317
320
|
else: # From or To is a DataComponent, or both
|
|
318
321
|
result = DataComponent(
|
|
319
|
-
name=operand.name,
|
|
322
|
+
name=operand.name,
|
|
323
|
+
data=None,
|
|
324
|
+
data_type=cls.return_type,
|
|
325
|
+
role=Role.MEASURE,
|
|
320
326
|
)
|
|
321
327
|
|
|
322
328
|
if isinstance(operand, Dataset):
|
|
@@ -369,14 +375,16 @@ class Between(Operator.Operator):
|
|
|
369
375
|
elif isinstance(operand, Scalar) and (
|
|
370
376
|
isinstance(from_data, pd.Series) or isinstance(to_data, pd.Series)
|
|
371
377
|
): # From or To is a DataComponent, or both
|
|
372
|
-
|
|
373
378
|
if isinstance(from_data, pd.Series):
|
|
374
379
|
series = pd.Series(operand.value, index=from_data.index, dtype=object)
|
|
375
380
|
elif isinstance(to_data, pd.Series):
|
|
376
381
|
series = pd.Series(operand.value, index=to_data.index, dtype=object)
|
|
377
382
|
result_series = cls.apply_operation_component(series, from_data, to_data)
|
|
378
383
|
result = DataComponent(
|
|
379
|
-
name=operand.name,
|
|
384
|
+
name=operand.name,
|
|
385
|
+
data=result_series,
|
|
386
|
+
data_type=cls.return_type,
|
|
387
|
+
role=Role.MEASURE,
|
|
380
388
|
)
|
|
381
389
|
return result
|
|
382
390
|
|