PyPI - vtlengine - Versions diffs - 1.4.0rc2__py3-none-any.whl - Mend

vtlengine 1.4.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

vtlengine/API/_InternalApi.py +791 -0
vtlengine/API/__init__.py +612 -0
vtlengine/API/data/schema/external_routines_schema.json +34 -0
vtlengine/API/data/schema/json_schema_2.1.json +116 -0
vtlengine/API/data/schema/value_domain_schema.json +97 -0
vtlengine/AST/ASTComment.py +57 -0
vtlengine/AST/ASTConstructor.py +598 -0
vtlengine/AST/ASTConstructorModules/Expr.py +1928 -0
vtlengine/AST/ASTConstructorModules/ExprComponents.py +995 -0
vtlengine/AST/ASTConstructorModules/Terminals.py +790 -0
vtlengine/AST/ASTConstructorModules/__init__.py +50 -0
vtlengine/AST/ASTDataExchange.py +10 -0
vtlengine/AST/ASTEncoders.py +32 -0
vtlengine/AST/ASTString.py +675 -0
vtlengine/AST/ASTTemplate.py +558 -0
vtlengine/AST/ASTVisitor.py +25 -0
vtlengine/AST/DAG/__init__.py +479 -0
vtlengine/AST/DAG/_words.py +10 -0
vtlengine/AST/Grammar/Vtl.g4 +705 -0
vtlengine/AST/Grammar/VtlTokens.g4 +409 -0
vtlengine/AST/Grammar/__init__.py +0 -0
vtlengine/AST/Grammar/lexer.py +2139 -0
vtlengine/AST/Grammar/parser.py +16597 -0
vtlengine/AST/Grammar/tokens.py +169 -0
vtlengine/AST/VtlVisitor.py +824 -0
vtlengine/AST/__init__.py +674 -0
vtlengine/DataTypes/TimeHandling.py +562 -0
vtlengine/DataTypes/__init__.py +863 -0
vtlengine/DataTypes/_time_checking.py +135 -0
vtlengine/Exceptions/__exception_file_generator.py +96 -0
vtlengine/Exceptions/__init__.py +159 -0
vtlengine/Exceptions/messages.py +1004 -0
vtlengine/Interpreter/__init__.py +2048 -0
vtlengine/Model/__init__.py +501 -0
vtlengine/Operators/Aggregation.py +357 -0
vtlengine/Operators/Analytic.py +455 -0
vtlengine/Operators/Assignment.py +23 -0
vtlengine/Operators/Boolean.py +106 -0
vtlengine/Operators/CastOperator.py +451 -0
vtlengine/Operators/Clause.py +366 -0
vtlengine/Operators/Comparison.py +488 -0
vtlengine/Operators/Conditional.py +495 -0
vtlengine/Operators/General.py +191 -0
vtlengine/Operators/HROperators.py +254 -0
vtlengine/Operators/Join.py +447 -0
vtlengine/Operators/Numeric.py +422 -0
vtlengine/Operators/RoleSetter.py +77 -0
vtlengine/Operators/Set.py +176 -0
vtlengine/Operators/String.py +578 -0
vtlengine/Operators/Time.py +1144 -0
vtlengine/Operators/Validation.py +275 -0
vtlengine/Operators/__init__.py +900 -0
vtlengine/Utils/__Virtual_Assets.py +34 -0
vtlengine/Utils/__init__.py +479 -0
vtlengine/__extras_check.py +17 -0
vtlengine/__init__.py +27 -0
vtlengine/files/__init__.py +0 -0
vtlengine/files/output/__init__.py +35 -0
vtlengine/files/output/_time_period_representation.py +55 -0
vtlengine/files/parser/__init__.py +240 -0
vtlengine/files/parser/_rfc_dialect.py +22 -0
vtlengine/py.typed +0 -0
vtlengine-1.4.0rc2.dist-info/METADATA +89 -0
vtlengine-1.4.0rc2.dist-info/RECORD +66 -0
vtlengine-1.4.0rc2.dist-info/WHEEL +4 -0
vtlengine-1.4.0rc2.dist-info/licenses/LICENSE.md +661 -0

vtlengine/Operators/Analytic.py ADDED Viewed

@@ -0,0 +1,455 @@
+from copy import copy
+from typing import List, Optional
+import duckdb
+# if os.environ.get("SPARK"):
+#     import pyspark.pandas as pd
+# else:
+#     import pandas as pd
+import pandas as pd
+import vtlengine.Operators as Operator
+from vtlengine.AST import OrderBy, Windowing
+from vtlengine.AST.Grammar.tokens import (
+    AVG,
+    COUNT,
+    FIRST_VALUE,
+    LAG,
+    LAST_VALUE,
+    LEAD,
+    MAX,
+    MEDIAN,
+    MIN,
+    RANK,
+    RATIO_TO_REPORT,
+    STDDEV_POP,
+    STDDEV_SAMP,
+    SUM,
+    VAR_POP,
+    VAR_SAMP,
+)
+from vtlengine.DataTypes import (
+    COMP_NAME_MAPPING,
+    Integer,
+    Number,
+    unary_implicit_promotion,
+)
+from vtlengine.Exceptions import SemanticError
+from vtlengine.Model import Component, Dataset, Role
+from vtlengine.Utils.__Virtual_Assets import VirtualCounter
+return_integer_operators = [MAX, MIN, SUM]
+# noinspection PyMethodOverriding
+class Analytic(Operator.Unary):
+    """
+    Analytic class
+    Class that inherits from Unary.
+    Class methods:
+        Validate: Validates the Dataset.
+        analyticfunc: Specify class method that returns a dataframe using the duckdb library.
+        Evaluate: Ensures the type of data is the correct one to perform the Analytic operators.
+    """
+    return_integer = None
+    sql_op: Optional[str] = None
+    @classmethod
+    def validate(  # type: ignore[override]  # noqa: C901
+        cls,
+        operand: Dataset,
+        partitioning: List[str],
+        ordering: Optional[List[OrderBy]],
+        window: Optional[Windowing],
+        params: Optional[List[int]],
+        component_name: Optional[str] = None,
+    ) -> Dataset:
+        order_components = [] if ordering is None else [o.component for o in ordering]
+        identifier_names = operand.get_identifiers_names()
+        result_components = operand.components.copy()
+        for comp_name in partitioning:
+            if comp_name not in operand.components:
+                raise SemanticError(
+                    "1-1-1-10",
+                    op=cls.op,
+                    comp_name=comp_name,
+                    dataset_name=operand.name,
+                )
+            if comp_name not in identifier_names:
+                raise SemanticError(
+                    "1-1-3-2",
+                    op=cls.op,
+                    id_name=comp_name,
+                    id_type=operand.components[comp_name].role,
+                )
+        for comp_name in order_components:
+            if comp_name not in operand.components:
+                raise SemanticError(
+                    "1-1-1-10",
+                    op=cls.op,
+                    comp_name=comp_name,
+                    dataset_name=operand.name,
+                )
+        if component_name is not None:
+            if cls.type_to_check is not None:
+                unary_implicit_promotion(
+                    operand.components[component_name].data_type, cls.type_to_check
+                )
+            if cls.op in return_integer_operators:
+                cls.return_integer = isinstance(cls.return_type, Integer)
+            elif cls.return_type is not None:
+                result_components[component_name] = Component(
+                    name=component_name,
+                    data_type=cls.return_type,
+                    role=operand.components[component_name].role,
+                    nullable=operand.components[component_name].nullable,
+                )
+            if cls.op == COUNT:
+                measure_name = COMP_NAME_MAPPING[cls.return_type]
+                result_components[measure_name] = Component(
+                    name=measure_name,
+                    data_type=cls.return_type,
+                    role=Role.MEASURE,
+                    nullable=operand.components[component_name].nullable,
+                )
+                if component_name in result_components:
+                    del result_components[component_name]
+        else:
+            measures = operand.get_measures()
+            if len(measures) == 0:
+                raise SemanticError("1-1-1-8", op=cls.op, name=operand.name)
+            if cls.op in return_integer_operators:
+                isNumber = False
+                for measure in measures:
+                    isNumber |= isinstance(measure.data_type, Number)
+                cls.return_integer = not isNumber
+            if cls.type_to_check is not None:
+                for measure in measures:
+                    unary_implicit_promotion(measure.data_type, cls.type_to_check)
+            if cls.op in return_integer_operators:
+                for measure in measures:
+                    new_measure = copy(measure)
+                    new_measure.data_type = Integer if cls.return_integer else Number
+                    result_components[measure.name] = new_measure
+            elif cls.return_type is not None:
+                for measure in measures:
+                    new_measure = copy(measure)
+                    new_measure.data_type = cls.return_type
+                    result_components[measure.name] = new_measure
+            if cls.op == COUNT and len(measures) <= 1:
+                measure_name = COMP_NAME_MAPPING[cls.return_type]
+                nullable = False if len(measures) == 0 else measures[0].nullable
+                if len(measures) == 1:
+                    del result_components[measures[0].name]
+                result_components[measure_name] = Component(
+                    name=measure_name,
+                    data_type=cls.return_type,
+                    role=Role.MEASURE,
+                    nullable=nullable,
+                )
+        dataset_name = VirtualCounter._new_ds_name()
+        return Dataset(name=dataset_name, components=result_components, data=None)
+    @classmethod
+    def analyticfunc(
+        cls,
+        df: pd.DataFrame,
+        partitioning: List[str],
+        identifier_names: List[str],
+        measure_names: List[str],
+        ordering: List[OrderBy],
+        window: Optional[Windowing],
+        params: Optional[List[int]] = None,
+    ) -> pd.DataFrame:
+        """Annotation class
+        It is used to analyze the attributes specified bellow
+        ensuring that the type of data is the correct one to perform
+        the operation.
+        Attributes:
+            identifier_names: List with the id names.
+            measure_names: List with the measures names.
+            ordering: List with the ordering modes.
+            window: ...
+            params: No params are related to this class.
+        """
+        # Windowing
+        window_str = ""
+        if window is not None:
+            mode = "ROWS" if window.type_ == "data" else "RANGE"
+            start_mode = (
+                window.start_mode.upper()
+                if (isinstance(window.start, int) and window.start != 0)
+                or (isinstance(window.start, str) and window.start == "unbounded")
+                else ""
+            )
+            stop_mode = (
+                window.stop_mode.upper()
+                if (isinstance(window.stop, int) and window.stop != 0)
+                or (isinstance(window.stop, str) and window.stop == "unbounded")
+                else ""
+            )
+            start = (
+                "UNBOUNDED"
+                if window.start == "unbounded" or window.start == -1
+                else str(window.start)
+            )
+            stop = (
+                "CURRENT ROW" if window.stop == "current" or window.stop == 0 else str(window.stop)
+            )
+            window_str = f"{mode} BETWEEN {start} {start_mode} AND {stop} {stop_mode}"
+        # Partitioning
+        partition = "PARTITION BY " + ", ".join(partitioning) if len(partitioning) > 0 else ""
+        # Ordering
+        order_str = ""
+        if len(ordering) > 0:
+            for x in ordering:
+                order_str += f"{x.component} {x.order}, "
+            if len(order_str) > 0:
+                order_str = "ORDER BY " + order_str[:-2]
+        # Generating the complete analytic string
+        analytic_str = f"OVER ( {partition} {order_str} {window_str})"
+        measure_queries = []
+        for measure in measure_names:
+            if cls.op == RANK:
+                measure_query = f"{cls.sql_op}()"
+            elif cls.op == RATIO_TO_REPORT:
+                measure_query = f"CAST({measure} AS DOUBLE) / SUM(CAST({measure} AS DOUBLE))"
+            elif cls.op in [LAG, LEAD]:
+                measure_query = f"{cls.sql_op}({measure}, {','.join(map(str, params or []))})"
+            else:
+                measure_query = f"{cls.sql_op}({measure})"
+            if cls.op == COUNT and len(measure_names) == 1:
+                measure_query += f" {analytic_str} as {COMP_NAME_MAPPING[cls.return_type]}"
+            elif cls.op in return_integer_operators and cls.return_integer:
+                measure_query = f"CAST({measure_query} {analytic_str} AS INTEGER) as {measure}"
+            else:
+                measure_query += f" {analytic_str} as {measure}"
+            measure_queries.append(measure_query)
+        if cls.op == COUNT and len(measure_names) == 0:
+            measure_queries.append(
+                f"COUNT(*) {analytic_str} as {COMP_NAME_MAPPING[cls.return_type]}"
+            )
+        measures_sql = ", ".join(measure_queries)
+        identifiers_sql = ", ".join(identifier_names)
+        query = f"SELECT {identifiers_sql} , {measures_sql} FROM df"
+        if cls.op == COUNT:
+            df[measure_names] = df[measure_names].fillna(-1)
+        # if os.getenv("SPARK", False):
+        #     df = df.to_pandas()
+        return duckdb.query(query).to_df().astype(object)
+    @classmethod
+    def evaluate(  # type: ignore[override]
+        cls,
+        operand: Dataset,
+        partitioning: List[str],
+        ordering: Optional[List[OrderBy]],
+        window: Optional[Windowing],
+        params: Optional[List[int]],
+        component_name: Optional[str] = None,
+    ) -> Dataset:
+        result = cls.validate(operand, partitioning, ordering, window, params, component_name)
+        df = operand.data.copy() if operand.data is not None else pd.DataFrame()
+        identifier_names = operand.get_identifiers_names()
+        if component_name is not None:
+            measure_names = [component_name]
+        else:
+            measure_names = operand.get_measures_names()
+        result.data = cls.analyticfunc(
+            df=df,
+            partitioning=partitioning,
+            identifier_names=identifier_names,
+            measure_names=measure_names,
+            ordering=ordering or [],
+            window=window,
+            params=params,
+        )
+        # if cls.return_type == Integer:
+        #     result.data[measure_names] = result.data[measure_names].astype('Int64')
+        return result
+class Max(Analytic):
+    """
+    Max operator
+    """
+    op = MAX
+    sql_op = "MAX"
+    return_integer = False
+class Min(Analytic):
+    """
+    Min operator
+    """
+    op = MIN
+    sql_op = "MIN"
+    return_integer = False
+class Sum(Analytic):
+    """
+    Sum operator
+    """
+    op = SUM
+    sql_op = "SUM"
+    return_integer = False
+class Count(Analytic):
+    """
+    Count operator
+    """
+    op = COUNT
+    type_to_check = None
+    return_type = Integer
+    sql_op = "COUNT"
+class Avg(Analytic):
+    """
+    Average operator
+    """
+    op = AVG
+    type_to_check = Number
+    return_type = Number
+    sql_op = "AVG"
+class Median(Analytic):
+    """
+    Median operator
+    """
+    op = MEDIAN
+    type_to_check = Number
+    return_type = Number
+    sql_op = "MEDIAN"
+class PopulationStandardDeviation(Analytic):
+    """
+    Population deviation operator
+    """
+    op = STDDEV_POP
+    type_to_check = Number
+    return_type = Number
+    sql_op = "STDDEV_POP"
+class SampleStandardDeviation(Analytic):
+    """
+    Sample standard deviation operator.
+    """
+    op = STDDEV_SAMP
+    type_to_check = Number
+    return_type = Number
+    sql_op = "STDDEV_SAMP"
+class PopulationVariance(Analytic):
+    """
+    Variance operator
+    """
+    op = VAR_POP
+    type_to_check = Number
+    return_type = Number
+    sql_op = "VAR_POP"
+class SampleVariance(Analytic):
+    """
+    Sample variance operator
+    """
+    op = VAR_SAMP
+    type_to_check = Number
+    return_type = Number
+    sql_op = "VAR_SAMP"
+class FirstValue(Analytic):
+    """
+    First value operator
+    """
+    op = FIRST_VALUE
+    sql_op = "FIRST"
+class LastValue(Analytic):
+    """
+    Last value operator
+    """
+    op = LAST_VALUE
+    sql_op = "LAST"
+class Lag(Analytic):
+    """
+    Lag operator
+    """
+    op = LAG
+    sql_op = "LAG"
+class Lead(Analytic):
+    """
+    Lead operator
+    """
+    op = LEAD
+    sql_op = "LEAD"
+class Rank(Analytic):
+    """
+    Rank operator
+    """
+    op = RANK
+    sql_op = "RANK"
+    return_type = Integer
+class RatioToReport(Analytic):
+    """
+    Ratio operator
+    """
+    op = RATIO_TO_REPORT
+    type_to_check = Number
+    return_type = Number

vtlengine/Operators/Assignment.py ADDED Viewed

@@ -0,0 +1,23 @@
+from typing import Any, Union
+from vtlengine.Exceptions import SemanticError
+from vtlengine.Model import DataComponent, Dataset
+from vtlengine.Operators import Binary
+ALL_MODEL_TYPES = Union[DataComponent, Dataset]
+class Assignment(Binary):
+    @classmethod
+    def validate(cls, left_operand: Any, right_operand: Any) -> ALL_MODEL_TYPES:
+        if (
+            isinstance(right_operand, DataComponent)
+            and right_operand.role.__str__() == "IDENTIFIER"
+        ):
+            raise SemanticError("1-1-6-13", op=cls.op, comp_name=right_operand.name)
+        right_operand.name = left_operand
+        return right_operand
+    @classmethod
+    def evaluate(cls, left_operand: Any, right_operand: Any) -> ALL_MODEL_TYPES:
+        return cls.validate(left_operand, right_operand)

vtlengine/Operators/Boolean.py ADDED Viewed

@@ -0,0 +1,106 @@
+# if os.environ.get("SPARK", False):
+#     import pyspark.pandas as pd
+# else:
+#     import pandas as pd
+from typing import Any, Optional
+import pandas as pd
+import vtlengine.Operators as Operator
+from vtlengine.AST.Grammar.tokens import AND, NOT, OR, XOR
+from vtlengine.DataTypes import Boolean
+class Unary(Operator.Unary):
+    type_to_check = Boolean
+    return_type = Boolean
+class Binary(Operator.Binary):
+    type_to_check = Boolean
+    return_type = Boolean
+    comp_op: Any = None
+    @classmethod
+    def apply_operation_series_scalar(cls, series: Any, scalar: Any, series_left: bool) -> Any:
+        if series_left:
+            return series.map(lambda x: cls.py_op(x, scalar))
+        else:
+            return series.map(lambda x: cls.py_op(scalar, x))
+    @classmethod
+    def apply_operation_two_series(cls, left_series: Any, right_series: Any) -> Any:
+        result = cls.comp_op(left_series.astype("boolean"), right_series.astype("boolean"))
+        return result.replace({pd.NA: None}).astype(object)
+    @classmethod
+    def op_func(cls, x: Optional[bool], y: Optional[bool]) -> Optional[bool]:
+        return cls.py_op(x, y)
+class And(Binary):
+    op = AND
+    comp_op = pd.Series.__and__
+    @staticmethod
+    # @numba.njit
+    def py_op(x: Optional[bool], y: Optional[bool]) -> Optional[bool]:
+        if (x is None and y == False) or (x == False and y is None):
+            return False
+        elif x is None or y is None:
+            return None
+        return x and y
+    # @classmethod
+    # def spark_op(cls, x: pd.Series, y: pd.Series) -> pd.Series:
+    #     return x & y
+class Or(Binary):
+    op = OR
+    comp_op = pd.Series.__or__
+    @staticmethod
+    # @numba.njit
+    def py_op(x: Optional[bool], y: Optional[bool]) -> Optional[bool]:
+        if (x is None and y == True) or (x == True and y is None):
+            return True
+        elif x is None or y is None:
+            return None
+        return x or y
+    # @classmethod
+    # def spark_op(cls, x: pd.Series, y: pd.Series) -> pd.Series:
+    #     return x | y
+class Xor(Binary):
+    op = XOR
+    comp_op = pd.Series.__xor__
+    @classmethod
+    def py_op(cls, x: Optional[bool], y: Optional[bool]) -> Optional[bool]:
+        if pd.isnull(x) or pd.isnull(y):
+            return None
+        return (x and not y) or (not x and y)
+    # @classmethod
+    # def spark_op(cls, x: pd.Series, y: pd.Series) -> pd.Series:
+    #     return x ^ y
+class Not(Unary):
+    op = NOT
+    @staticmethod
+    # @numba.njit
+    def py_op(x: Optional[bool]) -> Optional[bool]:
+        return None if x is None else not x
+    # @classmethod
+    # def spark_op(cls, series: pd.Series) -> pd.Series:
+    #     return ~series
+    @classmethod
+    def apply_operation_component(cls, series: Any) -> Any:
+        return series.map(lambda x: not x, na_action="ignore")