PyPI - vtlengine - Versions diffs - 1.2.1rc1__py3-none-any.whl → 1.3.0rc1__py3-none-any.whl - Mend

vtlengine 1.2.1rc1py3-none-any.whl → 1.3.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of vtlengine might be problematic. Click here for more details.

Files changed (25) hide show

vtlengine/API/_InternalApi.py +35 -12
vtlengine/API/__init__.py +52 -13
vtlengine/API/data/schema/external_routines_schema.json +34 -0
vtlengine/API/data/schema/value_domain_schema.json +97 -0
vtlengine/AST/ASTConstructorModules/Terminals.py +1 -5
vtlengine/AST/Grammar/lexer.py +1112 -19758
vtlengine/AST/Grammar/parser.py +3224 -17981
vtlengine/AST/__init__.py +3 -3
vtlengine/DataTypes/TimeHandling.py +12 -7
vtlengine/DataTypes/__init__.py +92 -0
vtlengine/{files/parser → DataTypes}/_time_checking.py +8 -3
vtlengine/Exceptions/messages.py +13 -0
vtlengine/Interpreter/__init__.py +60 -16
vtlengine/Model/__init__.py +47 -3
vtlengine/Operators/Aggregation.py +10 -2
vtlengine/Operators/Conditional.py +52 -34
vtlengine/Operators/General.py +1 -1
vtlengine/Operators/Validation.py +33 -5
vtlengine/Operators/__init__.py +10 -4
vtlengine/__init__.py +1 -1
vtlengine/files/parser/__init__.py +17 -7
{vtlengine-1.2.1rc1.dist-info → vtlengine-1.3.0rc1.dist-info}/METADATA +7 -6
{vtlengine-1.2.1rc1.dist-info → vtlengine-1.3.0rc1.dist-info}/RECORD +25 -23
{vtlengine-1.2.1rc1.dist-info → vtlengine-1.3.0rc1.dist-info}/WHEEL +1 -1
{vtlengine-1.2.1rc1.dist-info → vtlengine-1.3.0rc1.dist-info/licenses}/LICENSE.md +0 -0

vtlengine/AST/__init__.py CHANGED Viewed

@@ -443,7 +443,7 @@ class Validation(AST):
     op: str
     validation: str
     error_code: Optional[str]
-    error_level: Optional[int]
+    error_level: Optional[Union[int, str]]
     imbalance: Optional[AST]
     invalid: bool
@@ -590,7 +590,7 @@ class HRule(AST):
     name: Optional[str]
     rule: HRBinOp
     erCode: Optional[str]
-    erLevel: Optional[int]
+    erLevel: Optional[Union[int, str]]
     __eq__ = AST.ast_equality
@@ -604,7 +604,7 @@ class DPRule(AST):
     name: Optional[str]
     rule: HRBinOp
     erCode: Optional[str]
-    erLevel: Optional[int]
+    erLevel: Optional[Union[int, str]]
     __eq__ = AST.ast_equality

vtlengine/DataTypes/TimeHandling.py CHANGED Viewed

@@ -7,6 +7,7 @@ from typing import Any, Dict, Optional, Union
 import pandas as pd
+from vtlengine.AST.Grammar.tokens import GT, GTE, LT, LTE
 from vtlengine.Exceptions import SemanticError
 PERIOD_IND_MAPPING = {"A": 6, "S": 5, "Q": 4, "M": 3, "W": 2, "D": 1}
@@ -180,7 +181,7 @@ class TimePeriodHandler:
     @staticmethod
     def _check_year(year: int) -> None:
-        if year < 1900 or year > 9999:
+        if year < 0 or year > 9999:
             raise SemanticError("2-1-19-10", year=year)
             # raise ValueError(f'Invalid year {year}, must be between 1900 and 9999.')
@@ -254,6 +255,10 @@ class TimePeriodHandler:
         if isinstance(other, str):
             other = TimePeriodHandler(other)
+        if self.period_indicator != other.period_indicator:
+            tokens = {operator.lt: "<", operator.le: "<=", operator.gt: ">", operator.ge: ">="}
+            raise SemanticError("2-1-19-19", op=tokens[py_op], value1=self, value2=other)
         self_lapse, other_lapse = self.period_dates, other.period_dates
         is_lt_or_le = py_op in [operator.lt, operator.le]
         is_gt_or_ge = py_op in [operator.gt, operator.ge]
@@ -407,22 +412,22 @@ class TimeIntervalHandler:
         return py_op(self.length, other.length)
     def __eq__(self, other: Any) -> Optional[bool]:  # type: ignore[override]
-        return self._meta_comparison(other, operator.eq)
+        return str(self) == str(other) if other is not None else None
     def __ne__(self, other: Any) -> Optional[bool]:  # type: ignore[override]
-        return self._meta_comparison(other, operator.ne)
+        return str(self) != str(other) if other is not None else None
     def __lt__(self, other: Any) -> Optional[bool]:
-        return self._meta_comparison(other, operator.lt)
+        raise SemanticError("2-1-19-17", op=LT, type="Time")
     def __le__(self, other: Any) -> Optional[bool]:
-        return self._meta_comparison(other, operator.le)
+        raise SemanticError("2-1-19-17", op=LTE, type="Time")
     def __gt__(self, other: Any) -> Optional[bool]:
-        return self._meta_comparison(other, operator.gt)
+        raise SemanticError("2-1-19-17", op=GT, type="Time")
     def __ge__(self, other: Any) -> Optional[bool]:
-        return self._meta_comparison(other, operator.ge)
+        raise SemanticError("2-1-19-17", op=GTE, type="Time")
     @classmethod
     def from_time_period(cls, value: TimePeriodHandler) -> "TimeIntervalHandler":

vtlengine/DataTypes/__init__.py CHANGED Viewed

@@ -3,6 +3,11 @@ from typing import Any, Dict, Optional, Set, Type, Union
 import pandas as pd
+from vtlengine.DataTypes._time_checking import (
+    check_date,
+    check_time,
+    check_time_period,
+)
 from vtlengine.DataTypes.TimeHandling import (
     check_max_date,
     date_to_period_str,
@@ -102,6 +107,14 @@ class ScalarType:
         class_name: str = cls.__name__.__str__()
         return DTYPE_MAPPING[class_name]
+    @classmethod
+    def check(cls, value: Any) -> bool:
+        try:
+            cls.cast(value)
+            return True
+        except Exception:
+            return False
 class String(ScalarType):
     """ """
@@ -143,6 +156,10 @@ class String(ScalarType):
             type_2=SCALAR_TYPES_CLASS_REVERSE[cls],
         )
+    @classmethod
+    def check(cls, value: Any) -> bool:
+        return True
 class Number(ScalarType):
     """ """
@@ -201,6 +218,19 @@ class Number(ScalarType):
                 return 0.0
         return float(value)
+    @classmethod
+    def check(cls, value: Any) -> bool:
+        if pd.isnull(value):
+            return True
+        if isinstance(value, (int, float, bool)):
+            return True
+        if isinstance(value, str):
+            v = value.strip()
+            if v.lower() in {"true", "false"}:
+                return True
+            return bool(re.match(r"^\d+(\.\d*)?$|^\.\d+$", v))
+        return False
 class Integer(Number):
     """ """
@@ -286,6 +316,16 @@ class Integer(Number):
                 return 0
         return int(value)
+    @classmethod
+    def check(cls, value: Any) -> bool:
+        if pd.isnull(value):
+            return True
+        if isinstance(value, str):
+            return value.isdigit() or value.lower() in {"true", "false"}
+        if isinstance(value, float):
+            return value.is_integer()
+        return isinstance(value, (int, bool))
 class TimeInterval(ScalarType):
     """ """
@@ -325,6 +365,16 @@ class TimeInterval(ScalarType):
             type_2=SCALAR_TYPES_CLASS_REVERSE[cls],
         )
+    @classmethod
+    def check(cls, value: Any) -> bool:
+        if pd.isnull(value):
+            return True
+        try:
+            check_time(value)
+        except Exception:
+            return False
+        return True
 class Date(TimeInterval):
     """ """
@@ -357,6 +407,16 @@ class Date(TimeInterval):
             type_2=SCALAR_TYPES_CLASS_REVERSE[cls],
         )
+    @classmethod
+    def check(cls, value: Any) -> bool:
+        if pd.isnull(value):
+            return True
+        try:
+            check_date(value)
+        except Exception:
+            return False
+        return True
 class TimePeriod(TimeInterval):
     """ """
@@ -400,6 +460,16 @@ class TimePeriod(TimeInterval):
             type_2=SCALAR_TYPES_CLASS_REVERSE[cls],
         )
+    @classmethod
+    def check(cls, value: Any) -> bool:
+        if pd.isnull(value):
+            return True
+        try:
+            check_time_period(value)
+        except Exception:
+            return False
+        return True
 class Duration(ScalarType):
     iso8601_duration_pattern = r"^P((\d+Y)?(\d+M)?(\d+D)?)$"
@@ -461,6 +531,16 @@ class Duration(ScalarType):
         total_days = years * 365 + months * 30 + days
         return int(total_days)
+    @classmethod
+    def check(cls, value: Any) -> bool:
+        if pd.isnull(value):
+            return True
+        if isinstance(value, str):
+            match = re.match(cls.iso8601_duration_pattern, value)
+            return bool(match)
+        return False
 class Boolean(ScalarType):
     """ """
@@ -514,6 +594,14 @@ class Boolean(ScalarType):
             type_2=SCALAR_TYPES_CLASS_REVERSE[cls],
         )
+    @classmethod
+    def check(cls, value: Any) -> bool:
+        if pd.isnull(value):
+            return True
+        if isinstance(value, str):
+            return value.lower() in {"true", "false", "1", "0"}
+        return isinstance(value, (int, float, bool))
 class Null(ScalarType):
     """ """
@@ -534,6 +622,10 @@ class Null(ScalarType):
     def dtype(cls) -> str:
         return "string"
+    @classmethod
+    def check(cls, value: Any) -> bool:
+        return True
 SCALAR_TYPES: Dict[str, Type[ScalarType]] = {
     "String": String,

vtlengine/{files/parser → DataTypes}/_time_checking.py RENAMED Viewed

@@ -11,7 +11,7 @@ def check_date(value: str) -> str:
     Check if the date is in the correct format.
     """
     # Remove all whitespaces
-    value = value.replace(" ", "")
+    value = value.strip()
     try:
         if len(value) == 9 and value[7] == "-":
             value = value[:-1] + "0" + value[-1]
@@ -49,7 +49,7 @@ time_pattern = r"^" + date_pattern + r"/" + date_pattern + r"$"
 def check_time(value: str) -> str:
-    value = value.replace(" ", "")
+    value = value.strip()
     year_result = re.fullmatch(year_pattern, value)
     if year_result is not None:
         date1_time = datetime.strptime(value, "%Y")
@@ -94,7 +94,12 @@ further_options_period_pattern = (
 def check_time_period(value: str) -> str:
     if isinstance(value, int):
         value = str(value)
-    value = value.replace(" ", "")
+    value = value.strip()
+    match = re.fullmatch(r"^(\d{4})-(\d{2})$", value)
+    if match:
+        value = f"{match.group(1)}-M{match.group(2)}"
     period_result = re.fullmatch(period_pattern, value)
     if period_result is not None:
         result = TimePeriodHandler(value)

vtlengine/Exceptions/messages.py CHANGED Viewed

@@ -17,6 +17,8 @@ centralised_messages = {
     "0-1-2-3": "Component {component} is duplicated.",
     "0-1-2-4": "Invalid json structure because {err} on file {filename}.",
     "0-1-2-5": "File {file} must be encoded in utf-8 (without BOM).",
+    "0-1-2-6": "Not found scalar {name} in datastructures",
+    "0-1-2-7": "Invalid value '{value}' for type {type_} {op_type} {name}.",
     # Run SDMX errors
     "0-1-3-1": "Expected exactly one input dataset in the whole script, found: {number_datasets}",
     "0-1-3-2": "SDMX Dataset {schema} requires to have a Schema object defined as structure",
@@ -44,6 +46,8 @@ centralised_messages = {
     "0-1-1-12": "On Dataset {name} loading: not possible to cast column {column} to {type}.",
     "0-1-1-13": "Invalid key on {field} field: {key}{closest_key}.",
     "0-1-1-14": "Empty datasets {dataset1} and {dataset2} shape missmatch.",
+    "0-1-1-15": "On Dataset {name} loading: Duplicated identifiers are not allowed, "
+    "found on row {row_index}",
     "0-1-0-1": " Trying to redefine input datasets {dataset}.",  # Semantic Error
     # ------------Operators-------------
     # General Semantic errors
@@ -51,6 +55,7 @@ centralised_messages = {
     "1-1-1-2": "Invalid implicit cast from {type_1} and {type_2} to {type_check}.",
     "1-1-1-3": "At op {op}: {entity} {name} cannot be promoted to {target_type}.",
     "1-1-1-4": "At op {op}: Operation not allowed for multimeasure datasets.",
+    "1-1-1-5": "At op {op}: Invalid type {type}.",
     "1-1-1-8": "At op {op}: Invalid Dataset {name}, no measures defined.",
     "1-1-1-9": "At op {op}: Invalid Dataset {name}, all measures must have the same type: {type}.",
     "1-1-1-10": "Component {comp_name} not found in Dataset {dataset_name}.",
@@ -240,6 +245,14 @@ centralised_messages = {
     "measure.",
     "2-1-19-15": "{op} can only be applied according to the iso 8601 format mask",
     "2-1-19-16": "{op} can only be positive numbers",
+    "2-1-19-17": "At op {op}: Time operators comparison are only support "
+    "= and <> comparison operations",
+    "2-1-19-18": "At op {op}: Time operators do not support < and > comparison operations, "
+    "so its not possible to use get the max or min between two time operators",
+    "2-1-19-19": "Time Period comparison (>, <, >=, <=) with different period indicator is not "
+    "supported, found {value1} {op} {value2}",
+    "2-1-19-20": "Time Period operands with different period indicators do not support < and > "
+    "comparison operations, unable to get the {op}",
     # ----------- Interpreter Common ------
     "2-3-1": "{comp_type} {comp_name} not found.",
     "2-3-2": "{op_type} cannot be used with {node_op} operators.",

vtlengine/Interpreter/__init__.py CHANGED Viewed

@@ -151,6 +151,8 @@ class InterpreterAnalyzer(ASTTemplate):
     dprs: Optional[Dict[str, Optional[Dict[str, Any]]]] = None
     udos: Optional[Dict[str, Optional[Dict[str, Any]]]] = None
     hrs: Optional[Dict[str, Optional[Dict[str, Any]]]] = None
+    is_from_case_then: bool = False
+    signature_values: Optional[Dict[str, Any]] = None
     # **********************************
     # *                                *
@@ -1078,15 +1080,43 @@ class InterpreterAnalyzer(ASTTemplate):
         if self.condition_stack is None:
             self.condition_stack = []
+        if self.then_condition_dataset is None:
+            self.then_condition_dataset = []
+        if self.else_condition_dataset is None:
+            self.else_condition_dataset = []
-        while node.cases:
-            case = node.cases.pop(0)
+        for case in node.cases:
             self.is_from_condition = True
-            conditions.append(self.visit(case.condition))
+            cond = self.visit(case.condition)
             self.is_from_condition = False
-            thenOps.append(self.visit(case.thenOp))
-        return Case.analyze(conditions, thenOps, self.visit(node.elseOp))
+            conditions.append(cond)
+            if isinstance(cond, Scalar):
+                then_result = self.visit(case.thenOp)
+                thenOps.append(then_result)
+                continue
+            self.generate_then_else_datasets(copy(cond))
+            self.condition_stack.append(THEN_ELSE["then"])
+            self.is_from_if = True
+            self.is_from_case_then = True
+            then_result = self.visit(case.thenOp)
+            thenOps.append(then_result)
+            self.is_from_case_then = False
+            self.is_from_if = False
+            if len(self.condition_stack) > 0:
+                self.condition_stack.pop()
+            if len(self.then_condition_dataset) > 0:
+                self.then_condition_dataset.pop()
+            if len(self.else_condition_dataset) > 0:
+                self.else_condition_dataset.pop()
+        elseOp = self.visit(node.elseOp)
+        return Case.analyze(conditions, thenOps, elseOp)
     def visit_RenameNode(self, node: AST.RenameNode) -> Any:
         if self.udo_params is not None:
@@ -1575,11 +1605,10 @@ class InterpreterAnalyzer(ASTTemplate):
         if self.else_condition_dataset is None:
             self.else_condition_dataset = []
         if isinstance(condition, Dataset):
-            if (
-                len(condition.get_measures()) != 1
-                or condition.get_measures()[0].data_type != BASIC_TYPES[bool]
-            ):
-                raise ValueError("Only one boolean measure is allowed on condition dataset")
+            if len(condition.get_measures()) != 1:
+                raise SemanticError("1-1-1-4", op="condition")
+            if condition.get_measures()[0].data_type != BASIC_TYPES[bool]:
+                raise SemanticError("2-1-9-5", op="condition", name=condition.name)
             name = condition.get_measures_names()[0]
             if condition.data is None or condition.data.empty:
                 data = None
@@ -1589,7 +1618,7 @@ class InterpreterAnalyzer(ASTTemplate):
         else:
             if condition.data_type != BASIC_TYPES[bool]:
-                raise ValueError("Only boolean scalars are allowed on data component condition")
+                raise SemanticError("2-1-9-4", op="condition", name=condition.name)
             name = condition.name
             data = None if condition.data is None else condition.data
@@ -1667,11 +1696,18 @@ class InterpreterAnalyzer(ASTTemplate):
         ):
             return left_operand, right_operand
-        merge_dataset = (
-            self.then_condition_dataset.pop()
-            if self.condition_stack.pop() == THEN_ELSE["then"]
-            else (self.else_condition_dataset.pop())
-        )
+        if self.is_from_case_then:
+            merge_dataset = (
+                self.then_condition_dataset[-1]
+                if self.condition_stack[-1] == THEN_ELSE["then"]
+                else self.else_condition_dataset[-1]
+            )
+        else:
+            merge_dataset = (
+                self.then_condition_dataset.pop()
+                if self.condition_stack.pop() == THEN_ELSE["then"]
+                else (self.else_condition_dataset.pop())
+            )
         merge_index = merge_dataset.data[merge_dataset.get_measures_names()[0]].to_list()
         ids = merge_dataset.get_identifiers_names()
@@ -1826,6 +1862,8 @@ class InterpreterAnalyzer(ASTTemplate):
             raise SemanticError("2-3-10", comp_type="User Defined Operators")
         elif node.op not in self.udos:
             raise SemanticError("1-3-5", node_op=node.op, op_type="User Defined Operator")
+        if self.signature_values is None:
+            self.signature_values = {}
         operator = self.udos[node.op]
         signature_values = {}
@@ -1919,6 +1957,12 @@ class InterpreterAnalyzer(ASTTemplate):
             self.udo_params = []
         # Adding parameters to the stack
+        for k, v in signature_values.items():
+            if hasattr(v, "name"):
+                v = v.name  # type: ignore[assignment]
+            if v in self.signature_values:
+                signature_values[k] = self.signature_values[v]  # type: ignore[index]
+        self.signature_values.update(signature_values)
         self.udo_params.append(signature_values)
         # Calling the UDO AST, we use deepcopy to avoid changing the original UDO AST

vtlengine/Model/__init__.py CHANGED Viewed

@@ -14,7 +14,7 @@ from pandas._testing import assert_frame_equal
 import vtlengine.DataTypes as DataTypes
 from vtlengine.DataTypes import SCALAR_TYPES, ScalarType
 from vtlengine.DataTypes.TimeHandling import TimePeriodHandler
-from vtlengine.Exceptions import SemanticError
+from vtlengine.Exceptions import InputValidationException, SemanticError
 # from pyspark.pandas import DataFrame as SparkDataFrame, Series as SparkSeries
@@ -27,7 +27,28 @@ class Scalar:
     name: str
     data_type: Type[ScalarType]
-    value: Any
+    _value: Any
+    def __init__(self, name: str, data_type: Type[ScalarType], value: Any) -> None:
+        self.name = name
+        self.data_type = data_type
+        self.value = value
+    @property
+    def value(self) -> Any:
+        return self._value
+    @value.setter
+    def value(self, new_value: Any) -> None:
+        if self.data_type and not self.data_type.check(new_value):
+            raise InputValidationException(
+                code="0-1-2-7",
+                value=new_value,
+                type_=self.data_type.__name__,
+                op_type=self.__class__.__name__,
+                name=self.name,
+            )
+        self._value = new_value
     @classmethod
     def from_json(cls, json_str: str) -> "Scalar":
@@ -368,7 +389,30 @@ class ScalarSet:
     """
     data_type: Type[ScalarType]
-    values: List[Union[int, float, str, bool]]
+    _values: List[Union[int, float, str, bool]]
+    def __init__(
+        self, data_type: Type[ScalarType], values: List[Union[int, float, str, bool]]
+    ) -> None:
+        self.data_type = data_type
+        self.values = values
+    @property
+    def values(self) -> List[Union[int, float, str, bool]]:
+        return self._values
+    @values.setter
+    def values(self, new_values: List[Union[int, float, str, bool]]) -> None:
+        for value in new_values:
+            if self.data_type and not self.data_type.check(value):
+                raise InputValidationException(
+                    code="0-1-2-7",
+                    value=value,
+                    type_=self.data_type.__name__,
+                    op_type=self.__class__.__name__,
+                    name="",
+                )
+        self._values = new_values
     def __contains__(self, item: str) -> Optional[bool]:
         if isinstance(item, float) and item.is_integer():

vtlengine/Operators/Aggregation.py CHANGED Viewed

@@ -72,7 +72,7 @@ class Aggregation(Operator.Unary):
                         new_value = ["9999-99-99"]
                     else:
                         to_replace = ["9999-99-99"]
-                data[measure.name] = data[measure.name].replace(to_replace, new_value)
+                data[measure.name] = data[measure.name].replace(to_replace, new_value)  # type: ignore[arg-type, unused-ignore]
             elif measure.data_type == TimePeriod:
                 if mode == "input":
                     data[measure.name] = (
@@ -80,6 +80,10 @@ class Aggregation(Operator.Unary):
                         .astype(object)
                         .map(lambda x: TimePeriodHandler(str(x)), na_action="ignore")
                     )
+                    if cls.op in [MAX, MIN]:
+                        indicators = {v.period_indicator for v in data[measure.name].dropna()}
+                        if len(indicators) > 1:
+                            raise SemanticError("2-1-19-20", op=cls.op)
                 else:
                     data[measure.name] = data[measure.name].map(
                         lambda x: str(x), na_action="ignore"
@@ -99,7 +103,7 @@ class Aggregation(Operator.Unary):
                         lambda x: str(x), na_action="ignore"
                     )
             elif measure.data_type == String:
-                data[measure.name] = data[measure.name].replace(to_replace, new_value)
+                data[measure.name] = data[measure.name].replace(to_replace, new_value)  # type: ignore[arg-type, unused-ignore]
             elif measure.data_type == Duration:
                 if mode == "input":
                     data[measure.name] = data[measure.name].map(
@@ -259,6 +263,10 @@ class Aggregation(Operator.Unary):
         result_df = result_df[grouping_keys + measure_names]
         if cls.op == COUNT:
             result_df = result_df.dropna(subset=measure_names, how="any")
+        if cls.op in [MAX, MIN]:
+            for measure in operand.get_measures():
+                if measure.data_type == TimeInterval:
+                    raise SemanticError("2-1-19-18", op=cls.op)
         cls._handle_data_types(result_df, operand.get_measures(), "input")
         result_df = cls._agg_func(result_df, grouping_keys, measure_names, having_expr)

vtlengine 1.2.1rc1__py3-none-any.whl → 1.3.0rc1__py3-none-any.whl

Potentially problematic release.

vtlengine 1.2.1rc1py3-none-any.whl → 1.3.0rc1py3-none-any.whl