PyPI - vtlengine - Versions diffs - 1.0__py3-none-any.whl → 1.0.2__py3-none-any.whl - Mend

vtlengine 1.0py3-none-any.whl → 1.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of vtlengine might be problematic. Click here for more details.

Files changed (56) hide show

vtlengine/API/_InternalApi.py +159 -102
vtlengine/API/__init__.py +110 -68
vtlengine/AST/ASTConstructor.py +188 -98
vtlengine/AST/ASTConstructorModules/Expr.py +402 -205
vtlengine/AST/ASTConstructorModules/ExprComponents.py +248 -104
vtlengine/AST/ASTConstructorModules/Terminals.py +158 -95
vtlengine/AST/ASTEncoders.py +1 -1
vtlengine/AST/ASTTemplate.py +24 -9
vtlengine/AST/ASTVisitor.py +8 -12
vtlengine/AST/DAG/__init__.py +43 -35
vtlengine/AST/DAG/_words.py +4 -4
vtlengine/AST/Grammar/Vtl.g4 +49 -20
vtlengine/AST/Grammar/VtlTokens.g4 +13 -1
vtlengine/AST/Grammar/lexer.py +2012 -1312
vtlengine/AST/Grammar/parser.py +7524 -4343
vtlengine/AST/Grammar/tokens.py +140 -128
vtlengine/AST/VtlVisitor.py +16 -5
vtlengine/AST/__init__.py +41 -11
vtlengine/DataTypes/NumericTypesHandling.py +5 -4
vtlengine/DataTypes/TimeHandling.py +196 -301
vtlengine/DataTypes/__init__.py +304 -218
vtlengine/Exceptions/__init__.py +96 -27
vtlengine/Exceptions/messages.py +149 -69
vtlengine/Interpreter/__init__.py +817 -497
vtlengine/Model/__init__.py +172 -121
vtlengine/Operators/Aggregation.py +156 -95
vtlengine/Operators/Analytic.py +167 -79
vtlengine/Operators/Assignment.py +7 -4
vtlengine/Operators/Boolean.py +27 -32
vtlengine/Operators/CastOperator.py +177 -131
vtlengine/Operators/Clause.py +137 -99
vtlengine/Operators/Comparison.py +148 -117
vtlengine/Operators/Conditional.py +290 -98
vtlengine/Operators/General.py +68 -47
vtlengine/Operators/HROperators.py +91 -72
vtlengine/Operators/Join.py +217 -118
vtlengine/Operators/Numeric.py +129 -46
vtlengine/Operators/RoleSetter.py +16 -15
vtlengine/Operators/Set.py +61 -36
vtlengine/Operators/String.py +213 -139
vtlengine/Operators/Time.py +467 -215
vtlengine/Operators/Validation.py +117 -76
vtlengine/Operators/__init__.py +340 -213
vtlengine/Utils/__init__.py +232 -41
vtlengine/__init__.py +1 -1
vtlengine/files/output/__init__.py +15 -6
vtlengine/files/output/_time_period_representation.py +10 -9
vtlengine/files/parser/__init__.py +79 -52
vtlengine/files/parser/_rfc_dialect.py +6 -5
vtlengine/files/parser/_time_checking.py +48 -37
vtlengine-1.0.2.dist-info/METADATA +245 -0
vtlengine-1.0.2.dist-info/RECORD +58 -0
{vtlengine-1.0.dist-info → vtlengine-1.0.2.dist-info}/WHEEL +1 -1
vtlengine-1.0.dist-info/METADATA +0 -104
vtlengine-1.0.dist-info/RECORD +0 -58
{vtlengine-1.0.dist-info → vtlengine-1.0.2.dist-info}/LICENSE.md +0 -0

vtlengine/Operators/Analytic.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import os
 from copy import copy
 from typing import List, Optional
@@ -6,21 +5,33 @@ import duckdb
 from vtlengine.Exceptions import SemanticError
-if os.environ.get("SPARK"):
-    import pyspark.pandas as pd
-else:
-    import pandas as pd
+# if os.environ.get("SPARK"):
+#     import pyspark.pandas as pd
+# else:
+#     import pandas as pd
+import pandas as pd
 import vtlengine.Operators as Operator
 from vtlengine.AST import OrderBy, Windowing
-from vtlengine.AST.Grammar.tokens import AVG, COUNT, FIRST_VALUE, LAG, LAST_VALUE, LEAD, MAX, \
-    MEDIAN, MIN, \
-    RANK, RATIO_TO_REPORT, STDDEV_POP, \
-    STDDEV_SAMP, \
-    SUM, VAR_POP, \
-    VAR_SAMP
-from vtlengine.DataTypes import COMP_NAME_MAPPING, Integer, Number, \
-    unary_implicit_promotion
+from vtlengine.AST.Grammar.tokens import (
+    AVG,
+    COUNT,
+    FIRST_VALUE,
+    LAG,
+    LAST_VALUE,
+    LEAD,
+    MAX,
+    MEDIAN,
+    MIN,
+    RANK,
+    RATIO_TO_REPORT,
+    STDDEV_POP,
+    STDDEV_SAMP,
+    SUM,
+    VAR_POP,
+    VAR_SAMP,
+)
+from vtlengine.DataTypes import COMP_NAME_MAPPING, Integer, Number, unary_implicit_promotion
 from vtlengine.Model import Component, Dataset, Role
@@ -36,14 +47,19 @@ class Analytic(Operator.Unary):
         analyticfunc: Specify class method that returns a dataframe using the duckdb library.
         Evaluate: Ensures the type of data is the correct one to perform the Analytic operators.
     """
-    sql_op = None
+    sql_op: Optional[str] = None
     @classmethod
-    def validate(cls, operand: Dataset,
-                 partitioning: List[str],
-                 ordering: Optional[List[OrderBy]],
-                 window: Optional[Windowing],
-                 params: Optional[List[int]]) -> Dataset:
+    def validate(  # type: ignore[override]
+        cls,
+        operand: Dataset,
+        partitioning: List[str],
+        ordering: Optional[List[OrderBy]],
+        window: Optional[Windowing],
+        params: Optional[List[int]],
+        component_name: Optional[str] = None,
+    ) -> Dataset:
         if ordering is None:
             order_components = []
         else:
@@ -53,50 +69,84 @@ class Analytic(Operator.Unary):
         for comp_name in partitioning:
             if comp_name not in operand.components:
-                raise SemanticError("1-1-1-10", op=cls.op, comp_name=comp_name,
-                                    dataset_name=operand.name)
+                raise SemanticError(
+                    "1-1-1-10", op=cls.op, comp_name=comp_name, dataset_name=operand.name
+                )
             if comp_name not in identifier_names:
-                raise SemanticError("1-1-3-2", op=cls.op, id_name=comp_name,
-                                    id_type=operand.components[comp_name].role)
+                raise SemanticError(
+                    "1-1-3-2",
+                    op=cls.op,
+                    id_name=comp_name,
+                    id_type=operand.components[comp_name].role,
+                )
         for comp_name in order_components:
             if comp_name not in operand.components:
-                raise SemanticError("1-1-1-10", op=cls.op, comp_name=comp_name,
-                                    dataset_name=operand.name)
-        measures = operand.get_measures()
-        if measures is None:
-            raise SemanticError("1-1-1-8", op=cls.op, name=operand.name)
-        if cls.type_to_check is not None:
-            for measure in measures:
-                unary_implicit_promotion(measure.data_type, cls.type_to_check)
-        if cls.return_type is not None:
-            for measure in measures:
-                new_measure = copy(measure)
-                new_measure.data_type = cls.return_type
-                result_components[measure.name] = new_measure
-        if cls.op == COUNT and len(measures) <= 1:
-            measure_name = COMP_NAME_MAPPING[cls.return_type]
-            nullable = False if len(measures) == 0 else measures[0].nullable
-            if len(measures) == 1:
-                del result_components[measures[0].name]
-            result_components[measure_name] = Component(
-                name=measure_name,
-                data_type=cls.return_type,
-                role=Role.MEASURE,
-                nullable=nullable
-            )
+                raise SemanticError(
+                    "1-1-1-10", op=cls.op, comp_name=comp_name, dataset_name=operand.name
+                )
+        if component_name is not None:
+            if cls.type_to_check is not None:
+                unary_implicit_promotion(
+                    operand.components[component_name].data_type, cls.type_to_check
+                )
+            if cls.return_type is not None:
+                result_components[component_name] = Component(
+                    name=component_name,
+                    data_type=cls.return_type,
+                    role=operand.components[component_name].role,
+                    nullable=operand.components[component_name].nullable,
+                )
+            if cls.op == COUNT:
+                measure_name = COMP_NAME_MAPPING[cls.return_type]
+                result_components[measure_name] = Component(
+                    name=measure_name,
+                    data_type=cls.return_type,
+                    role=Role.MEASURE,
+                    nullable=operand.components[component_name].nullable,
+                )
+                if component_name in result_components:
+                    del result_components[component_name]
+        else:
+            measures = operand.get_measures()
+            if len(measures) == 0:
+                raise SemanticError("1-1-1-8", op=cls.op, name=operand.name)
+            if cls.type_to_check is not None:
+                for measure in measures:
+                    unary_implicit_promotion(measure.data_type, cls.type_to_check)
+            if cls.return_type is not None:
+                for measure in measures:
+                    new_measure = copy(measure)
+                    new_measure.data_type = cls.return_type
+                    result_components[measure.name] = new_measure
+            if cls.op == COUNT and len(measures) <= 1:
+                measure_name = COMP_NAME_MAPPING[cls.return_type]
+                nullable = False if len(measures) == 0 else measures[0].nullable
+                if len(measures) == 1:
+                    del result_components[measures[0].name]
+                result_components[measure_name] = Component(
+                    name=measure_name,
+                    data_type=cls.return_type,
+                    role=Role.MEASURE,
+                    nullable=nullable,
+                )
         return Dataset(name="result", components=result_components, data=None)
     @classmethod
-    def analyticfunc(cls, df: pd.DataFrame, partitioning: List[str],
-                     identifier_names: List[str],
-                     measure_names: List[str],
-                     ordering: List[OrderBy],
-                     window: Optional[Windowing],
-                     params: Optional[List[int]] = None):
+    def analyticfunc(
+        cls,
+        df: pd.DataFrame,
+        partitioning: List[str],
+        identifier_names: List[str],
+        measure_names: List[str],
+        ordering: List[OrderBy],
+        window: Optional[Windowing],
+        params: Optional[List[int]] = None,
+    ) -> pd.DataFrame:
         """Annotation class
-        It is used to analyze the attributes specified bellow ensuring that the type of data is the correct one to perform
+        It is used to analyze the attributes specified bellow
+        ensuring that the type of data is the correct one to perform
         the operation.
         Attributes:
@@ -110,18 +160,26 @@ class Analytic(Operator.Unary):
         window_str = ""
         if window is not None:
             mode = "ROWS" if window.type_ == "data" else "RANGE"
-            start_mode = window.start_mode if window.start_mode != 'current' and window.start != 'CURRENT ROW' else ''
-            stop_mode = window.stop_mode if window.stop_mode != 'current' and window.stop != 'CURRENT ROW' else ''
-            if window.start == -1:
-                window.start = 'UNBOUNDED'
+            start_mode = (
+                window.start_mode
+                if window.start_mode != "current" and window.start != "CURRENT ROW"
+                else ""
+            )
+            stop_mode = (
+                window.stop_mode
+                if window.stop_mode != "current" and window.stop != "CURRENT ROW"
+                else ""
+            )
+            if isinstance(window.start, int) and window.start == -1:
+                window.start = "UNBOUNDED"
-            if stop_mode == '' and window.stop == 0:
-                window.stop = 'CURRENT ROW'
+            if stop_mode == "" and window.stop == 0:
+                window.stop = "CURRENT ROW"
             window_str = f"{mode} BETWEEN {window.start} {start_mode} AND {window.stop} {stop_mode}"
         # Partitioning
         if len(partitioning) > 0:
-            partition = "PARTITION BY " + ', '.join(partitioning)
+            partition = "PARTITION BY " + ", ".join(partitioning)
         else:
             partition = ""
@@ -143,7 +201,7 @@ class Analytic(Operator.Unary):
             elif cls.op == RATIO_TO_REPORT:
                 measure_query = f"CAST({measure} AS REAL) / SUM(CAST({measure} AS REAL))"
             elif cls.op in [LAG, LEAD]:
-                measure_query = f"{cls.sql_op}({measure}, {','.join(map(str, params))})"
+                measure_query = f"{cls.sql_op}({measure}, {','.join(map(str, params or []))})"
             else:
                 measure_query = f"{cls.sql_op}({measure})"
             if cls.op == COUNT and len(measure_names) == 1:
@@ -153,33 +211,47 @@ class Analytic(Operator.Unary):
             measure_queries.append(measure_query)
         if cls.op == COUNT and len(measure_names) == 0:
             measure_queries.append(
-                f"COUNT(*) {analytic_str} as {COMP_NAME_MAPPING[cls.return_type]}")
+                f"COUNT(*) {analytic_str} as {COMP_NAME_MAPPING[cls.return_type]}"
+            )
-        measures_sql = ', '.join(measure_queries)
-        identifiers_sql = ', '.join(identifier_names)
+        measures_sql = ", ".join(measure_queries)
+        identifiers_sql = ", ".join(identifier_names)
         query = f"SELECT {identifiers_sql} , {measures_sql} FROM df"
         if cls.op == COUNT:
             df[measure_names] = df[measure_names].fillna(-1)
-        if os.getenv("SPARK", False):
-            df = df.to_pandas()
+        # if os.getenv("SPARK", False):
+        #     df = df.to_pandas()
         return duckdb.query(query).to_df()
     @classmethod
-    def evaluate(cls, operand: Dataset,
-                 partitioning: List[str],
-                 ordering: Optional[List[OrderBy]],
-                 window: Optional[Windowing],
-                 params: Optional[List[int]]) -> Dataset:
-        result = cls.validate(operand, partitioning, ordering, window, params)
-        df = operand.data.copy()
-        measure_names = operand.get_measures_names()
+    def evaluate(  # type: ignore[override]
+        cls,
+        operand: Dataset,
+        partitioning: List[str],
+        ordering: Optional[List[OrderBy]],
+        window: Optional[Windowing],
+        params: Optional[List[int]],
+        component_name: Optional[str] = None,
+    ) -> Dataset:
+        result = cls.validate(operand, partitioning, ordering, window, params, component_name)
+        df = operand.data.copy() if operand.data is not None else pd.DataFrame()
         identifier_names = operand.get_identifiers_names()
-        result.data = cls.analyticfunc(df=df, partitioning=partitioning,
-                                       identifier_names=identifier_names,
-                                       measure_names=measure_names,
-                                       ordering=ordering, window=window, params=params)
+        if component_name is not None:
+            measure_names = [component_name]
+        else:
+            measure_names = operand.get_measures_names()
+        result.data = cls.analyticfunc(
+            df=df,
+            partitioning=partitioning,
+            identifier_names=identifier_names,
+            measure_names=measure_names,
+            ordering=ordering or [],
+            window=window,
+            params=params,
+        )
         return result
@@ -187,6 +259,7 @@ class Max(Analytic):
     """
     Max operator
     """
     op = MAX
     sql_op = "MAX"
@@ -195,6 +268,7 @@ class Min(Analytic):
     """
     Min operator
     """
     op = MIN
     sql_op = "MIN"
@@ -203,6 +277,7 @@ class Sum(Analytic):
     """
     Sum operator
     """
     op = SUM
     type_to_check = Number
     return_type = Number
@@ -213,6 +288,7 @@ class Count(Analytic):
     """
     Count operator
     """
     op = COUNT
     type_to_check = None
     return_type = Integer
@@ -223,6 +299,7 @@ class Avg(Analytic):
     """
     Average operator
     """
     op = AVG
     type_to_check = Number
     return_type = Number
@@ -233,6 +310,7 @@ class Median(Analytic):
     """
     Median operator
     """
     op = MEDIAN
     type_to_check = Number
     return_type = Number
@@ -243,6 +321,7 @@ class PopulationStandardDeviation(Analytic):
     """
     Population deviation operator
     """
     op = STDDEV_POP
     type_to_check = Number
     return_type = Number
@@ -253,6 +332,7 @@ class SampleStandardDeviation(Analytic):
     """
     Sample standard deviation operator.
     """
     op = STDDEV_SAMP
     type_to_check = Number
     return_type = Number
@@ -263,6 +343,7 @@ class PopulationVariance(Analytic):
     """
     Variance operator
     """
     op = VAR_POP
     type_to_check = Number
     return_type = Number
@@ -273,6 +354,7 @@ class SampleVariance(Analytic):
     """
     Sample variance operator
     """
     op = VAR_SAMP
     type_to_check = Number
     return_type = Number
@@ -283,6 +365,7 @@ class FirstValue(Analytic):
     """
     First value operator
     """
     op = FIRST_VALUE
     sql_op = "FIRST"
@@ -291,6 +374,7 @@ class LastValue(Analytic):
     """
     Last value operator
     """
     op = LAST_VALUE
     sql_op = "LAST"
@@ -299,6 +383,7 @@ class Lag(Analytic):
     """
     Lag operator
     """
     op = LAG
     sql_op = "LAG"
@@ -307,6 +392,7 @@ class Lead(Analytic):
     """
     Lead operator
     """
     op = LEAD
     sql_op = "LEAD"
@@ -315,6 +401,7 @@ class Rank(Analytic):
     """
     Rank operator
     """
     op = RANK
     sql_op = "RANK"
     return_type = Integer
@@ -324,6 +411,7 @@ class RatioToReport(Analytic):
     """
     Ratio operator
     """
     op = RATIO_TO_REPORT
     type_to_check = Number
     return_type = Number

vtlengine/Operators/Assignment.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Union
+from typing import Union, Any
 from vtlengine.Operators import Binary
@@ -11,12 +11,15 @@ ALL_MODEL_TYPES = Union[DataComponent, Dataset]
 class Assignment(Binary):
     @classmethod
-    def validate(cls, left_operand: str, right_operand: ALL_MODEL_TYPES) -> ALL_MODEL_TYPES:
-        if isinstance(right_operand, DataComponent) and right_operand.role == "IDENTIFIER":
+    def validate(cls, left_operand: Any, right_operand: Any) -> ALL_MODEL_TYPES:
+        if (
+            isinstance(right_operand, DataComponent)
+            and right_operand.role.__str__() == "IDENTIFIER"
+        ):
             raise SemanticError("1-1-6-13", op=cls.op, comp_name=right_operand.name)
         right_operand.name = left_operand
         return right_operand
     @classmethod
-    def evaluate(cls, left_operand: str, right_operand: ALL_MODEL_TYPES) -> ALL_MODEL_TYPES:
+    def evaluate(cls, left_operand: Any, right_operand: Any) -> ALL_MODEL_TYPES:
         return cls.validate(left_operand, right_operand)

vtlengine/Operators/Boolean.py CHANGED Viewed

@@ -1,11 +1,8 @@
-import os
-import numba
-if os.environ.get("SPARK", False):
-    import pyspark.pandas as pd
-else:
-    import pandas as pd
+# if os.environ.get("SPARK", False):
+#     import pyspark.pandas as pd
+# else:
+#     import pandas as pd
+import pandas as pd
 from typing import Optional, Any
@@ -22,22 +19,20 @@ class Unary(Operator.Unary):
 class Binary(Operator.Binary):
     type_to_check = Boolean
     return_type = Boolean
-    comp_op = None
+    comp_op: Any = None
     @classmethod
-    def apply_operation_series_scalar(cls, series: pd.Series, scalar: Any,
-                                      series_left: bool) -> Any:
+    def apply_operation_series_scalar(cls, series: Any, scalar: Any, series_left: bool) -> Any:
         if series_left:
             return series.map(lambda x: cls.py_op(x, scalar))
         else:
             return series.map(lambda x: cls.py_op(scalar, x))
     @classmethod
-    def apply_operation_two_series(cls,
-                                   left_series: Any,
-                                   right_series: Any) -> Any:
-        result = cls.comp_op(left_series.astype('bool[pyarrow]'),
-                             right_series.astype('bool[pyarrow]'))
+    def apply_operation_two_series(cls, left_series: Any, right_series: Any) -> Any:
+        result = cls.comp_op(
+            left_series.astype("boolean"), right_series.astype("boolean")
+        )
         return result.replace({pd.NA: None}).astype(object)
     @classmethod
@@ -50,7 +45,7 @@ class And(Binary):
     comp_op = pd.Series.__and__
     @staticmethod
-    @numba.njit
+    # @numba.njit
     def py_op(x: Optional[bool], y: Optional[bool]) -> Optional[bool]:
         if (x is None and y == False) or (x == False and y is None):
             return False
@@ -58,9 +53,9 @@ class And(Binary):
             return None
         return x and y
-    @classmethod
-    def spark_op(cls, x: pd.Series, y: pd.Series) -> pd.Series:
-        return x & y
+    # @classmethod
+    # def spark_op(cls, x: pd.Series, y: pd.Series) -> pd.Series:
+    #     return x & y
 class Or(Binary):
@@ -68,7 +63,7 @@ class Or(Binary):
     comp_op = pd.Series.__or__
     @staticmethod
-    @numba.njit
+    # @numba.njit
     def py_op(x: Optional[bool], y: Optional[bool]) -> Optional[bool]:
         if (x is None and y == True) or (x == True and y is None):
             return True
@@ -76,9 +71,9 @@ class Or(Binary):
             return None
         return x or y
-    @classmethod
-    def spark_op(cls, x: pd.Series, y: pd.Series) -> pd.Series:
-        return x | y
+    # @classmethod
+    # def spark_op(cls, x: pd.Series, y: pd.Series) -> pd.Series:
+    #     return x | y
 class Xor(Binary):
@@ -91,23 +86,23 @@ class Xor(Binary):
             return None
         return (x and not y) or (not x and y)
-    @classmethod
-    def spark_op(cls, x: pd.Series, y: pd.Series) -> pd.Series:
-        return x ^ y
+    # @classmethod
+    # def spark_op(cls, x: pd.Series, y: pd.Series) -> pd.Series:
+    #     return x ^ y
 class Not(Unary):
     op = NOT
     @staticmethod
-    @numba.njit
+    # @numba.njit
     def py_op(x: Optional[bool]) -> Optional[bool]:
         return None if x is None else not x
-    @classmethod
-    def spark_op(cls, series: pd.Series) -> pd.Series:
-        return ~series
+    # @classmethod
+    # def spark_op(cls, series: pd.Series) -> pd.Series:
+    #     return ~series
     @classmethod
     def apply_operation_component(cls, series: Any) -> Any:
-        return series.map(lambda x: not x, na_action='ignore')
+        return series.map(lambda x: not x, na_action="ignore")

vtlengine 1.0__py3-none-any.whl → 1.0.2__py3-none-any.whl

Potentially problematic release.

vtlengine 1.0py3-none-any.whl → 1.0.2py3-none-any.whl