vtlengine 1.2.1rc1__py3-none-any.whl → 1.3.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +35 -12
- vtlengine/API/__init__.py +52 -13
- vtlengine/API/data/schema/external_routines_schema.json +34 -0
- vtlengine/API/data/schema/value_domain_schema.json +97 -0
- vtlengine/AST/ASTConstructorModules/Terminals.py +1 -5
- vtlengine/AST/Grammar/lexer.py +1112 -19758
- vtlengine/AST/Grammar/parser.py +3224 -17981
- vtlengine/AST/__init__.py +3 -3
- vtlengine/DataTypes/TimeHandling.py +12 -7
- vtlengine/DataTypes/__init__.py +92 -0
- vtlengine/{files/parser → DataTypes}/_time_checking.py +8 -3
- vtlengine/Exceptions/messages.py +13 -0
- vtlengine/Interpreter/__init__.py +60 -16
- vtlengine/Model/__init__.py +47 -3
- vtlengine/Operators/Aggregation.py +10 -2
- vtlengine/Operators/Conditional.py +52 -34
- vtlengine/Operators/General.py +1 -1
- vtlengine/Operators/Validation.py +33 -5
- vtlengine/Operators/__init__.py +10 -4
- vtlengine/__init__.py +1 -1
- vtlengine/files/parser/__init__.py +17 -7
- {vtlengine-1.2.1rc1.dist-info → vtlengine-1.3.0rc1.dist-info}/METADATA +7 -6
- {vtlengine-1.2.1rc1.dist-info → vtlengine-1.3.0rc1.dist-info}/RECORD +25 -23
- {vtlengine-1.2.1rc1.dist-info → vtlengine-1.3.0rc1.dist-info}/WHEEL +1 -1
- {vtlengine-1.2.1rc1.dist-info → vtlengine-1.3.0rc1.dist-info/licenses}/LICENSE.md +0 -0
vtlengine/AST/__init__.py
CHANGED
|
@@ -443,7 +443,7 @@ class Validation(AST):
|
|
|
443
443
|
op: str
|
|
444
444
|
validation: str
|
|
445
445
|
error_code: Optional[str]
|
|
446
|
-
error_level: Optional[int]
|
|
446
|
+
error_level: Optional[Union[int, str]]
|
|
447
447
|
imbalance: Optional[AST]
|
|
448
448
|
invalid: bool
|
|
449
449
|
|
|
@@ -590,7 +590,7 @@ class HRule(AST):
|
|
|
590
590
|
name: Optional[str]
|
|
591
591
|
rule: HRBinOp
|
|
592
592
|
erCode: Optional[str]
|
|
593
|
-
erLevel: Optional[int]
|
|
593
|
+
erLevel: Optional[Union[int, str]]
|
|
594
594
|
|
|
595
595
|
__eq__ = AST.ast_equality
|
|
596
596
|
|
|
@@ -604,7 +604,7 @@ class DPRule(AST):
|
|
|
604
604
|
name: Optional[str]
|
|
605
605
|
rule: HRBinOp
|
|
606
606
|
erCode: Optional[str]
|
|
607
|
-
erLevel: Optional[int]
|
|
607
|
+
erLevel: Optional[Union[int, str]]
|
|
608
608
|
|
|
609
609
|
__eq__ = AST.ast_equality
|
|
610
610
|
|
|
@@ -7,6 +7,7 @@ from typing import Any, Dict, Optional, Union
|
|
|
7
7
|
|
|
8
8
|
import pandas as pd
|
|
9
9
|
|
|
10
|
+
from vtlengine.AST.Grammar.tokens import GT, GTE, LT, LTE
|
|
10
11
|
from vtlengine.Exceptions import SemanticError
|
|
11
12
|
|
|
12
13
|
PERIOD_IND_MAPPING = {"A": 6, "S": 5, "Q": 4, "M": 3, "W": 2, "D": 1}
|
|
@@ -180,7 +181,7 @@ class TimePeriodHandler:
|
|
|
180
181
|
|
|
181
182
|
@staticmethod
|
|
182
183
|
def _check_year(year: int) -> None:
|
|
183
|
-
if year <
|
|
184
|
+
if year < 0 or year > 9999:
|
|
184
185
|
raise SemanticError("2-1-19-10", year=year)
|
|
185
186
|
# raise ValueError(f'Invalid year {year}, must be between 1900 and 9999.')
|
|
186
187
|
|
|
@@ -254,6 +255,10 @@ class TimePeriodHandler:
|
|
|
254
255
|
if isinstance(other, str):
|
|
255
256
|
other = TimePeriodHandler(other)
|
|
256
257
|
|
|
258
|
+
if self.period_indicator != other.period_indicator:
|
|
259
|
+
tokens = {operator.lt: "<", operator.le: "<=", operator.gt: ">", operator.ge: ">="}
|
|
260
|
+
raise SemanticError("2-1-19-19", op=tokens[py_op], value1=self, value2=other)
|
|
261
|
+
|
|
257
262
|
self_lapse, other_lapse = self.period_dates, other.period_dates
|
|
258
263
|
is_lt_or_le = py_op in [operator.lt, operator.le]
|
|
259
264
|
is_gt_or_ge = py_op in [operator.gt, operator.ge]
|
|
@@ -407,22 +412,22 @@ class TimeIntervalHandler:
|
|
|
407
412
|
return py_op(self.length, other.length)
|
|
408
413
|
|
|
409
414
|
def __eq__(self, other: Any) -> Optional[bool]: # type: ignore[override]
|
|
410
|
-
return self
|
|
415
|
+
return str(self) == str(other) if other is not None else None
|
|
411
416
|
|
|
412
417
|
def __ne__(self, other: Any) -> Optional[bool]: # type: ignore[override]
|
|
413
|
-
return self
|
|
418
|
+
return str(self) != str(other) if other is not None else None
|
|
414
419
|
|
|
415
420
|
def __lt__(self, other: Any) -> Optional[bool]:
|
|
416
|
-
|
|
421
|
+
raise SemanticError("2-1-19-17", op=LT, type="Time")
|
|
417
422
|
|
|
418
423
|
def __le__(self, other: Any) -> Optional[bool]:
|
|
419
|
-
|
|
424
|
+
raise SemanticError("2-1-19-17", op=LTE, type="Time")
|
|
420
425
|
|
|
421
426
|
def __gt__(self, other: Any) -> Optional[bool]:
|
|
422
|
-
|
|
427
|
+
raise SemanticError("2-1-19-17", op=GT, type="Time")
|
|
423
428
|
|
|
424
429
|
def __ge__(self, other: Any) -> Optional[bool]:
|
|
425
|
-
|
|
430
|
+
raise SemanticError("2-1-19-17", op=GTE, type="Time")
|
|
426
431
|
|
|
427
432
|
@classmethod
|
|
428
433
|
def from_time_period(cls, value: TimePeriodHandler) -> "TimeIntervalHandler":
|
vtlengine/DataTypes/__init__.py
CHANGED
|
@@ -3,6 +3,11 @@ from typing import Any, Dict, Optional, Set, Type, Union
|
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
|
|
6
|
+
from vtlengine.DataTypes._time_checking import (
|
|
7
|
+
check_date,
|
|
8
|
+
check_time,
|
|
9
|
+
check_time_period,
|
|
10
|
+
)
|
|
6
11
|
from vtlengine.DataTypes.TimeHandling import (
|
|
7
12
|
check_max_date,
|
|
8
13
|
date_to_period_str,
|
|
@@ -102,6 +107,14 @@ class ScalarType:
|
|
|
102
107
|
class_name: str = cls.__name__.__str__()
|
|
103
108
|
return DTYPE_MAPPING[class_name]
|
|
104
109
|
|
|
110
|
+
@classmethod
|
|
111
|
+
def check(cls, value: Any) -> bool:
|
|
112
|
+
try:
|
|
113
|
+
cls.cast(value)
|
|
114
|
+
return True
|
|
115
|
+
except Exception:
|
|
116
|
+
return False
|
|
117
|
+
|
|
105
118
|
|
|
106
119
|
class String(ScalarType):
|
|
107
120
|
""" """
|
|
@@ -143,6 +156,10 @@ class String(ScalarType):
|
|
|
143
156
|
type_2=SCALAR_TYPES_CLASS_REVERSE[cls],
|
|
144
157
|
)
|
|
145
158
|
|
|
159
|
+
@classmethod
|
|
160
|
+
def check(cls, value: Any) -> bool:
|
|
161
|
+
return True
|
|
162
|
+
|
|
146
163
|
|
|
147
164
|
class Number(ScalarType):
|
|
148
165
|
""" """
|
|
@@ -201,6 +218,19 @@ class Number(ScalarType):
|
|
|
201
218
|
return 0.0
|
|
202
219
|
return float(value)
|
|
203
220
|
|
|
221
|
+
@classmethod
|
|
222
|
+
def check(cls, value: Any) -> bool:
|
|
223
|
+
if pd.isnull(value):
|
|
224
|
+
return True
|
|
225
|
+
if isinstance(value, (int, float, bool)):
|
|
226
|
+
return True
|
|
227
|
+
if isinstance(value, str):
|
|
228
|
+
v = value.strip()
|
|
229
|
+
if v.lower() in {"true", "false"}:
|
|
230
|
+
return True
|
|
231
|
+
return bool(re.match(r"^\d+(\.\d*)?$|^\.\d+$", v))
|
|
232
|
+
return False
|
|
233
|
+
|
|
204
234
|
|
|
205
235
|
class Integer(Number):
|
|
206
236
|
""" """
|
|
@@ -286,6 +316,16 @@ class Integer(Number):
|
|
|
286
316
|
return 0
|
|
287
317
|
return int(value)
|
|
288
318
|
|
|
319
|
+
@classmethod
|
|
320
|
+
def check(cls, value: Any) -> bool:
|
|
321
|
+
if pd.isnull(value):
|
|
322
|
+
return True
|
|
323
|
+
if isinstance(value, str):
|
|
324
|
+
return value.isdigit() or value.lower() in {"true", "false"}
|
|
325
|
+
if isinstance(value, float):
|
|
326
|
+
return value.is_integer()
|
|
327
|
+
return isinstance(value, (int, bool))
|
|
328
|
+
|
|
289
329
|
|
|
290
330
|
class TimeInterval(ScalarType):
|
|
291
331
|
""" """
|
|
@@ -325,6 +365,16 @@ class TimeInterval(ScalarType):
|
|
|
325
365
|
type_2=SCALAR_TYPES_CLASS_REVERSE[cls],
|
|
326
366
|
)
|
|
327
367
|
|
|
368
|
+
@classmethod
|
|
369
|
+
def check(cls, value: Any) -> bool:
|
|
370
|
+
if pd.isnull(value):
|
|
371
|
+
return True
|
|
372
|
+
try:
|
|
373
|
+
check_time(value)
|
|
374
|
+
except Exception:
|
|
375
|
+
return False
|
|
376
|
+
return True
|
|
377
|
+
|
|
328
378
|
|
|
329
379
|
class Date(TimeInterval):
|
|
330
380
|
""" """
|
|
@@ -357,6 +407,16 @@ class Date(TimeInterval):
|
|
|
357
407
|
type_2=SCALAR_TYPES_CLASS_REVERSE[cls],
|
|
358
408
|
)
|
|
359
409
|
|
|
410
|
+
@classmethod
|
|
411
|
+
def check(cls, value: Any) -> bool:
|
|
412
|
+
if pd.isnull(value):
|
|
413
|
+
return True
|
|
414
|
+
try:
|
|
415
|
+
check_date(value)
|
|
416
|
+
except Exception:
|
|
417
|
+
return False
|
|
418
|
+
return True
|
|
419
|
+
|
|
360
420
|
|
|
361
421
|
class TimePeriod(TimeInterval):
|
|
362
422
|
""" """
|
|
@@ -400,6 +460,16 @@ class TimePeriod(TimeInterval):
|
|
|
400
460
|
type_2=SCALAR_TYPES_CLASS_REVERSE[cls],
|
|
401
461
|
)
|
|
402
462
|
|
|
463
|
+
@classmethod
|
|
464
|
+
def check(cls, value: Any) -> bool:
|
|
465
|
+
if pd.isnull(value):
|
|
466
|
+
return True
|
|
467
|
+
try:
|
|
468
|
+
check_time_period(value)
|
|
469
|
+
except Exception:
|
|
470
|
+
return False
|
|
471
|
+
return True
|
|
472
|
+
|
|
403
473
|
|
|
404
474
|
class Duration(ScalarType):
|
|
405
475
|
iso8601_duration_pattern = r"^P((\d+Y)?(\d+M)?(\d+D)?)$"
|
|
@@ -461,6 +531,16 @@ class Duration(ScalarType):
|
|
|
461
531
|
total_days = years * 365 + months * 30 + days
|
|
462
532
|
return int(total_days)
|
|
463
533
|
|
|
534
|
+
@classmethod
|
|
535
|
+
def check(cls, value: Any) -> bool:
|
|
536
|
+
if pd.isnull(value):
|
|
537
|
+
return True
|
|
538
|
+
|
|
539
|
+
if isinstance(value, str):
|
|
540
|
+
match = re.match(cls.iso8601_duration_pattern, value)
|
|
541
|
+
return bool(match)
|
|
542
|
+
return False
|
|
543
|
+
|
|
464
544
|
|
|
465
545
|
class Boolean(ScalarType):
|
|
466
546
|
""" """
|
|
@@ -514,6 +594,14 @@ class Boolean(ScalarType):
|
|
|
514
594
|
type_2=SCALAR_TYPES_CLASS_REVERSE[cls],
|
|
515
595
|
)
|
|
516
596
|
|
|
597
|
+
@classmethod
|
|
598
|
+
def check(cls, value: Any) -> bool:
|
|
599
|
+
if pd.isnull(value):
|
|
600
|
+
return True
|
|
601
|
+
if isinstance(value, str):
|
|
602
|
+
return value.lower() in {"true", "false", "1", "0"}
|
|
603
|
+
return isinstance(value, (int, float, bool))
|
|
604
|
+
|
|
517
605
|
|
|
518
606
|
class Null(ScalarType):
|
|
519
607
|
""" """
|
|
@@ -534,6 +622,10 @@ class Null(ScalarType):
|
|
|
534
622
|
def dtype(cls) -> str:
|
|
535
623
|
return "string"
|
|
536
624
|
|
|
625
|
+
@classmethod
|
|
626
|
+
def check(cls, value: Any) -> bool:
|
|
627
|
+
return True
|
|
628
|
+
|
|
537
629
|
|
|
538
630
|
SCALAR_TYPES: Dict[str, Type[ScalarType]] = {
|
|
539
631
|
"String": String,
|
|
@@ -11,7 +11,7 @@ def check_date(value: str) -> str:
|
|
|
11
11
|
Check if the date is in the correct format.
|
|
12
12
|
"""
|
|
13
13
|
# Remove all whitespaces
|
|
14
|
-
value = value.
|
|
14
|
+
value = value.strip()
|
|
15
15
|
try:
|
|
16
16
|
if len(value) == 9 and value[7] == "-":
|
|
17
17
|
value = value[:-1] + "0" + value[-1]
|
|
@@ -49,7 +49,7 @@ time_pattern = r"^" + date_pattern + r"/" + date_pattern + r"$"
|
|
|
49
49
|
|
|
50
50
|
|
|
51
51
|
def check_time(value: str) -> str:
|
|
52
|
-
value = value.
|
|
52
|
+
value = value.strip()
|
|
53
53
|
year_result = re.fullmatch(year_pattern, value)
|
|
54
54
|
if year_result is not None:
|
|
55
55
|
date1_time = datetime.strptime(value, "%Y")
|
|
@@ -94,7 +94,12 @@ further_options_period_pattern = (
|
|
|
94
94
|
def check_time_period(value: str) -> str:
|
|
95
95
|
if isinstance(value, int):
|
|
96
96
|
value = str(value)
|
|
97
|
-
value = value.
|
|
97
|
+
value = value.strip()
|
|
98
|
+
|
|
99
|
+
match = re.fullmatch(r"^(\d{4})-(\d{2})$", value)
|
|
100
|
+
if match:
|
|
101
|
+
value = f"{match.group(1)}-M{match.group(2)}"
|
|
102
|
+
|
|
98
103
|
period_result = re.fullmatch(period_pattern, value)
|
|
99
104
|
if period_result is not None:
|
|
100
105
|
result = TimePeriodHandler(value)
|
vtlengine/Exceptions/messages.py
CHANGED
|
@@ -17,6 +17,8 @@ centralised_messages = {
|
|
|
17
17
|
"0-1-2-3": "Component {component} is duplicated.",
|
|
18
18
|
"0-1-2-4": "Invalid json structure because {err} on file {filename}.",
|
|
19
19
|
"0-1-2-5": "File {file} must be encoded in utf-8 (without BOM).",
|
|
20
|
+
"0-1-2-6": "Not found scalar {name} in datastructures",
|
|
21
|
+
"0-1-2-7": "Invalid value '{value}' for type {type_} {op_type} {name}.",
|
|
20
22
|
# Run SDMX errors
|
|
21
23
|
"0-1-3-1": "Expected exactly one input dataset in the whole script, found: {number_datasets}",
|
|
22
24
|
"0-1-3-2": "SDMX Dataset {schema} requires to have a Schema object defined as structure",
|
|
@@ -44,6 +46,8 @@ centralised_messages = {
|
|
|
44
46
|
"0-1-1-12": "On Dataset {name} loading: not possible to cast column {column} to {type}.",
|
|
45
47
|
"0-1-1-13": "Invalid key on {field} field: {key}{closest_key}.",
|
|
46
48
|
"0-1-1-14": "Empty datasets {dataset1} and {dataset2} shape missmatch.",
|
|
49
|
+
"0-1-1-15": "On Dataset {name} loading: Duplicated identifiers are not allowed, "
|
|
50
|
+
"found on row {row_index}",
|
|
47
51
|
"0-1-0-1": " Trying to redefine input datasets {dataset}.", # Semantic Error
|
|
48
52
|
# ------------Operators-------------
|
|
49
53
|
# General Semantic errors
|
|
@@ -51,6 +55,7 @@ centralised_messages = {
|
|
|
51
55
|
"1-1-1-2": "Invalid implicit cast from {type_1} and {type_2} to {type_check}.",
|
|
52
56
|
"1-1-1-3": "At op {op}: {entity} {name} cannot be promoted to {target_type}.",
|
|
53
57
|
"1-1-1-4": "At op {op}: Operation not allowed for multimeasure datasets.",
|
|
58
|
+
"1-1-1-5": "At op {op}: Invalid type {type}.",
|
|
54
59
|
"1-1-1-8": "At op {op}: Invalid Dataset {name}, no measures defined.",
|
|
55
60
|
"1-1-1-9": "At op {op}: Invalid Dataset {name}, all measures must have the same type: {type}.",
|
|
56
61
|
"1-1-1-10": "Component {comp_name} not found in Dataset {dataset_name}.",
|
|
@@ -240,6 +245,14 @@ centralised_messages = {
|
|
|
240
245
|
"measure.",
|
|
241
246
|
"2-1-19-15": "{op} can only be applied according to the iso 8601 format mask",
|
|
242
247
|
"2-1-19-16": "{op} can only be positive numbers",
|
|
248
|
+
"2-1-19-17": "At op {op}: Time operators comparison are only support "
|
|
249
|
+
"= and <> comparison operations",
|
|
250
|
+
"2-1-19-18": "At op {op}: Time operators do not support < and > comparison operations, "
|
|
251
|
+
"so its not possible to use get the max or min between two time operators",
|
|
252
|
+
"2-1-19-19": "Time Period comparison (>, <, >=, <=) with different period indicator is not "
|
|
253
|
+
"supported, found {value1} {op} {value2}",
|
|
254
|
+
"2-1-19-20": "Time Period operands with different period indicators do not support < and > "
|
|
255
|
+
"comparison operations, unable to get the {op}",
|
|
243
256
|
# ----------- Interpreter Common ------
|
|
244
257
|
"2-3-1": "{comp_type} {comp_name} not found.",
|
|
245
258
|
"2-3-2": "{op_type} cannot be used with {node_op} operators.",
|
|
@@ -151,6 +151,8 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
151
151
|
dprs: Optional[Dict[str, Optional[Dict[str, Any]]]] = None
|
|
152
152
|
udos: Optional[Dict[str, Optional[Dict[str, Any]]]] = None
|
|
153
153
|
hrs: Optional[Dict[str, Optional[Dict[str, Any]]]] = None
|
|
154
|
+
is_from_case_then: bool = False
|
|
155
|
+
signature_values: Optional[Dict[str, Any]] = None
|
|
154
156
|
|
|
155
157
|
# **********************************
|
|
156
158
|
# * *
|
|
@@ -1078,15 +1080,43 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1078
1080
|
|
|
1079
1081
|
if self.condition_stack is None:
|
|
1080
1082
|
self.condition_stack = []
|
|
1083
|
+
if self.then_condition_dataset is None:
|
|
1084
|
+
self.then_condition_dataset = []
|
|
1085
|
+
if self.else_condition_dataset is None:
|
|
1086
|
+
self.else_condition_dataset = []
|
|
1081
1087
|
|
|
1082
|
-
|
|
1083
|
-
case = node.cases.pop(0)
|
|
1088
|
+
for case in node.cases:
|
|
1084
1089
|
self.is_from_condition = True
|
|
1085
|
-
|
|
1090
|
+
cond = self.visit(case.condition)
|
|
1086
1091
|
self.is_from_condition = False
|
|
1087
|
-
thenOps.append(self.visit(case.thenOp))
|
|
1088
1092
|
|
|
1089
|
-
|
|
1093
|
+
conditions.append(cond)
|
|
1094
|
+
if isinstance(cond, Scalar):
|
|
1095
|
+
then_result = self.visit(case.thenOp)
|
|
1096
|
+
thenOps.append(then_result)
|
|
1097
|
+
continue
|
|
1098
|
+
|
|
1099
|
+
self.generate_then_else_datasets(copy(cond))
|
|
1100
|
+
|
|
1101
|
+
self.condition_stack.append(THEN_ELSE["then"])
|
|
1102
|
+
self.is_from_if = True
|
|
1103
|
+
self.is_from_case_then = True
|
|
1104
|
+
|
|
1105
|
+
then_result = self.visit(case.thenOp)
|
|
1106
|
+
thenOps.append(then_result)
|
|
1107
|
+
|
|
1108
|
+
self.is_from_case_then = False
|
|
1109
|
+
self.is_from_if = False
|
|
1110
|
+
if len(self.condition_stack) > 0:
|
|
1111
|
+
self.condition_stack.pop()
|
|
1112
|
+
if len(self.then_condition_dataset) > 0:
|
|
1113
|
+
self.then_condition_dataset.pop()
|
|
1114
|
+
if len(self.else_condition_dataset) > 0:
|
|
1115
|
+
self.else_condition_dataset.pop()
|
|
1116
|
+
|
|
1117
|
+
elseOp = self.visit(node.elseOp)
|
|
1118
|
+
|
|
1119
|
+
return Case.analyze(conditions, thenOps, elseOp)
|
|
1090
1120
|
|
|
1091
1121
|
def visit_RenameNode(self, node: AST.RenameNode) -> Any:
|
|
1092
1122
|
if self.udo_params is not None:
|
|
@@ -1575,11 +1605,10 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1575
1605
|
if self.else_condition_dataset is None:
|
|
1576
1606
|
self.else_condition_dataset = []
|
|
1577
1607
|
if isinstance(condition, Dataset):
|
|
1578
|
-
if (
|
|
1579
|
-
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
raise ValueError("Only one boolean measure is allowed on condition dataset")
|
|
1608
|
+
if len(condition.get_measures()) != 1:
|
|
1609
|
+
raise SemanticError("1-1-1-4", op="condition")
|
|
1610
|
+
if condition.get_measures()[0].data_type != BASIC_TYPES[bool]:
|
|
1611
|
+
raise SemanticError("2-1-9-5", op="condition", name=condition.name)
|
|
1583
1612
|
name = condition.get_measures_names()[0]
|
|
1584
1613
|
if condition.data is None or condition.data.empty:
|
|
1585
1614
|
data = None
|
|
@@ -1589,7 +1618,7 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1589
1618
|
|
|
1590
1619
|
else:
|
|
1591
1620
|
if condition.data_type != BASIC_TYPES[bool]:
|
|
1592
|
-
raise
|
|
1621
|
+
raise SemanticError("2-1-9-4", op="condition", name=condition.name)
|
|
1593
1622
|
name = condition.name
|
|
1594
1623
|
data = None if condition.data is None else condition.data
|
|
1595
1624
|
|
|
@@ -1667,11 +1696,18 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1667
1696
|
):
|
|
1668
1697
|
return left_operand, right_operand
|
|
1669
1698
|
|
|
1670
|
-
|
|
1671
|
-
|
|
1672
|
-
|
|
1673
|
-
|
|
1674
|
-
|
|
1699
|
+
if self.is_from_case_then:
|
|
1700
|
+
merge_dataset = (
|
|
1701
|
+
self.then_condition_dataset[-1]
|
|
1702
|
+
if self.condition_stack[-1] == THEN_ELSE["then"]
|
|
1703
|
+
else self.else_condition_dataset[-1]
|
|
1704
|
+
)
|
|
1705
|
+
else:
|
|
1706
|
+
merge_dataset = (
|
|
1707
|
+
self.then_condition_dataset.pop()
|
|
1708
|
+
if self.condition_stack.pop() == THEN_ELSE["then"]
|
|
1709
|
+
else (self.else_condition_dataset.pop())
|
|
1710
|
+
)
|
|
1675
1711
|
|
|
1676
1712
|
merge_index = merge_dataset.data[merge_dataset.get_measures_names()[0]].to_list()
|
|
1677
1713
|
ids = merge_dataset.get_identifiers_names()
|
|
@@ -1826,6 +1862,8 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1826
1862
|
raise SemanticError("2-3-10", comp_type="User Defined Operators")
|
|
1827
1863
|
elif node.op not in self.udos:
|
|
1828
1864
|
raise SemanticError("1-3-5", node_op=node.op, op_type="User Defined Operator")
|
|
1865
|
+
if self.signature_values is None:
|
|
1866
|
+
self.signature_values = {}
|
|
1829
1867
|
|
|
1830
1868
|
operator = self.udos[node.op]
|
|
1831
1869
|
signature_values = {}
|
|
@@ -1919,6 +1957,12 @@ class InterpreterAnalyzer(ASTTemplate):
|
|
|
1919
1957
|
self.udo_params = []
|
|
1920
1958
|
|
|
1921
1959
|
# Adding parameters to the stack
|
|
1960
|
+
for k, v in signature_values.items():
|
|
1961
|
+
if hasattr(v, "name"):
|
|
1962
|
+
v = v.name # type: ignore[assignment]
|
|
1963
|
+
if v in self.signature_values:
|
|
1964
|
+
signature_values[k] = self.signature_values[v] # type: ignore[index]
|
|
1965
|
+
self.signature_values.update(signature_values)
|
|
1922
1966
|
self.udo_params.append(signature_values)
|
|
1923
1967
|
|
|
1924
1968
|
# Calling the UDO AST, we use deepcopy to avoid changing the original UDO AST
|
vtlengine/Model/__init__.py
CHANGED
|
@@ -14,7 +14,7 @@ from pandas._testing import assert_frame_equal
|
|
|
14
14
|
import vtlengine.DataTypes as DataTypes
|
|
15
15
|
from vtlengine.DataTypes import SCALAR_TYPES, ScalarType
|
|
16
16
|
from vtlengine.DataTypes.TimeHandling import TimePeriodHandler
|
|
17
|
-
from vtlengine.Exceptions import SemanticError
|
|
17
|
+
from vtlengine.Exceptions import InputValidationException, SemanticError
|
|
18
18
|
|
|
19
19
|
# from pyspark.pandas import DataFrame as SparkDataFrame, Series as SparkSeries
|
|
20
20
|
|
|
@@ -27,7 +27,28 @@ class Scalar:
|
|
|
27
27
|
|
|
28
28
|
name: str
|
|
29
29
|
data_type: Type[ScalarType]
|
|
30
|
-
|
|
30
|
+
_value: Any
|
|
31
|
+
|
|
32
|
+
def __init__(self, name: str, data_type: Type[ScalarType], value: Any) -> None:
|
|
33
|
+
self.name = name
|
|
34
|
+
self.data_type = data_type
|
|
35
|
+
self.value = value
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
def value(self) -> Any:
|
|
39
|
+
return self._value
|
|
40
|
+
|
|
41
|
+
@value.setter
|
|
42
|
+
def value(self, new_value: Any) -> None:
|
|
43
|
+
if self.data_type and not self.data_type.check(new_value):
|
|
44
|
+
raise InputValidationException(
|
|
45
|
+
code="0-1-2-7",
|
|
46
|
+
value=new_value,
|
|
47
|
+
type_=self.data_type.__name__,
|
|
48
|
+
op_type=self.__class__.__name__,
|
|
49
|
+
name=self.name,
|
|
50
|
+
)
|
|
51
|
+
self._value = new_value
|
|
31
52
|
|
|
32
53
|
@classmethod
|
|
33
54
|
def from_json(cls, json_str: str) -> "Scalar":
|
|
@@ -368,7 +389,30 @@ class ScalarSet:
|
|
|
368
389
|
"""
|
|
369
390
|
|
|
370
391
|
data_type: Type[ScalarType]
|
|
371
|
-
|
|
392
|
+
_values: List[Union[int, float, str, bool]]
|
|
393
|
+
|
|
394
|
+
def __init__(
|
|
395
|
+
self, data_type: Type[ScalarType], values: List[Union[int, float, str, bool]]
|
|
396
|
+
) -> None:
|
|
397
|
+
self.data_type = data_type
|
|
398
|
+
self.values = values
|
|
399
|
+
|
|
400
|
+
@property
|
|
401
|
+
def values(self) -> List[Union[int, float, str, bool]]:
|
|
402
|
+
return self._values
|
|
403
|
+
|
|
404
|
+
@values.setter
|
|
405
|
+
def values(self, new_values: List[Union[int, float, str, bool]]) -> None:
|
|
406
|
+
for value in new_values:
|
|
407
|
+
if self.data_type and not self.data_type.check(value):
|
|
408
|
+
raise InputValidationException(
|
|
409
|
+
code="0-1-2-7",
|
|
410
|
+
value=value,
|
|
411
|
+
type_=self.data_type.__name__,
|
|
412
|
+
op_type=self.__class__.__name__,
|
|
413
|
+
name="",
|
|
414
|
+
)
|
|
415
|
+
self._values = new_values
|
|
372
416
|
|
|
373
417
|
def __contains__(self, item: str) -> Optional[bool]:
|
|
374
418
|
if isinstance(item, float) and item.is_integer():
|
|
@@ -72,7 +72,7 @@ class Aggregation(Operator.Unary):
|
|
|
72
72
|
new_value = ["9999-99-99"]
|
|
73
73
|
else:
|
|
74
74
|
to_replace = ["9999-99-99"]
|
|
75
|
-
data[measure.name] = data[measure.name].replace(to_replace, new_value)
|
|
75
|
+
data[measure.name] = data[measure.name].replace(to_replace, new_value) # type: ignore[arg-type, unused-ignore]
|
|
76
76
|
elif measure.data_type == TimePeriod:
|
|
77
77
|
if mode == "input":
|
|
78
78
|
data[measure.name] = (
|
|
@@ -80,6 +80,10 @@ class Aggregation(Operator.Unary):
|
|
|
80
80
|
.astype(object)
|
|
81
81
|
.map(lambda x: TimePeriodHandler(str(x)), na_action="ignore")
|
|
82
82
|
)
|
|
83
|
+
if cls.op in [MAX, MIN]:
|
|
84
|
+
indicators = {v.period_indicator for v in data[measure.name].dropna()}
|
|
85
|
+
if len(indicators) > 1:
|
|
86
|
+
raise SemanticError("2-1-19-20", op=cls.op)
|
|
83
87
|
else:
|
|
84
88
|
data[measure.name] = data[measure.name].map(
|
|
85
89
|
lambda x: str(x), na_action="ignore"
|
|
@@ -99,7 +103,7 @@ class Aggregation(Operator.Unary):
|
|
|
99
103
|
lambda x: str(x), na_action="ignore"
|
|
100
104
|
)
|
|
101
105
|
elif measure.data_type == String:
|
|
102
|
-
data[measure.name] = data[measure.name].replace(to_replace, new_value)
|
|
106
|
+
data[measure.name] = data[measure.name].replace(to_replace, new_value) # type: ignore[arg-type, unused-ignore]
|
|
103
107
|
elif measure.data_type == Duration:
|
|
104
108
|
if mode == "input":
|
|
105
109
|
data[measure.name] = data[measure.name].map(
|
|
@@ -259,6 +263,10 @@ class Aggregation(Operator.Unary):
|
|
|
259
263
|
result_df = result_df[grouping_keys + measure_names]
|
|
260
264
|
if cls.op == COUNT:
|
|
261
265
|
result_df = result_df.dropna(subset=measure_names, how="any")
|
|
266
|
+
if cls.op in [MAX, MIN]:
|
|
267
|
+
for measure in operand.get_measures():
|
|
268
|
+
if measure.data_type == TimeInterval:
|
|
269
|
+
raise SemanticError("2-1-19-18", op=cls.op)
|
|
262
270
|
cls._handle_data_types(result_df, operand.get_measures(), "input")
|
|
263
271
|
result_df = cls._agg_func(result_df, grouping_keys, measure_names, having_expr)
|
|
264
272
|
|