PyPI - vtlengine - Versions diffs - 1.0.3rc3__py3-none-any.whl → 1.1rc1__py3-none-any.whl - Mend

vtlengine 1.0.3rc3py3-none-any.whl → 1.1rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of vtlengine might be problematic. Click here for more details.

Files changed (48) hide show

vtlengine/API/_InternalApi.py +64 -58
vtlengine/API/__init__.py +11 -2
vtlengine/API/data/schema/json_schema_2.1.json +116 -0
vtlengine/AST/ASTConstructor.py +5 -4
vtlengine/AST/ASTConstructorModules/Expr.py +47 -48
vtlengine/AST/ASTConstructorModules/ExprComponents.py +45 -23
vtlengine/AST/ASTConstructorModules/Terminals.py +21 -11
vtlengine/AST/ASTEncoders.py +1 -1
vtlengine/AST/DAG/__init__.py +0 -3
vtlengine/AST/Grammar/lexer.py +0 -1
vtlengine/AST/Grammar/parser.py +185 -440
vtlengine/AST/VtlVisitor.py +0 -1
vtlengine/DataTypes/TimeHandling.py +50 -15
vtlengine/DataTypes/__init__.py +79 -7
vtlengine/Exceptions/__init__.py +3 -5
vtlengine/Exceptions/messages.py +65 -105
vtlengine/Interpreter/__init__.py +83 -38
vtlengine/Model/__init__.py +7 -9
vtlengine/Operators/Aggregation.py +13 -7
vtlengine/Operators/Analytic.py +48 -9
vtlengine/Operators/Assignment.py +0 -1
vtlengine/Operators/CastOperator.py +44 -44
vtlengine/Operators/Clause.py +16 -10
vtlengine/Operators/Comparison.py +20 -12
vtlengine/Operators/Conditional.py +30 -13
vtlengine/Operators/General.py +9 -4
vtlengine/Operators/HROperators.py +4 -14
vtlengine/Operators/Join.py +15 -14
vtlengine/Operators/Numeric.py +32 -26
vtlengine/Operators/RoleSetter.py +6 -2
vtlengine/Operators/Set.py +12 -8
vtlengine/Operators/String.py +9 -9
vtlengine/Operators/Time.py +136 -116
vtlengine/Operators/Validation.py +10 -4
vtlengine/Operators/__init__.py +56 -69
vtlengine/Utils/__init__.py +6 -1
vtlengine/__extras_check.py +17 -0
vtlengine/files/output/__init__.py +2 -1
vtlengine/files/output/_time_period_representation.py +2 -1
vtlengine/files/parser/__init__.py +47 -31
vtlengine/files/parser/_rfc_dialect.py +1 -1
vtlengine/files/parser/_time_checking.py +4 -4
{vtlengine-1.0.3rc3.dist-info → vtlengine-1.1rc1.dist-info}/METADATA +17 -17
vtlengine-1.1rc1.dist-info/RECORD +59 -0
{vtlengine-1.0.3rc3.dist-info → vtlengine-1.1rc1.dist-info}/WHEEL +1 -1
vtlengine/DataTypes/NumericTypesHandling.py +0 -38
vtlengine-1.0.3rc3.dist-info/RECORD +0 -58
{vtlengine-1.0.3rc3.dist-info → vtlengine-1.1rc1.dist-info}/LICENSE.md +0 -0

vtlengine/Operators/__init__.py CHANGED Viewed

@@ -8,7 +8,20 @@ from typing import Any, Optional, Union
 #     import pandas as pd
 import pandas as pd
-from vtlengine.AST.Grammar.tokens import AND, CEIL, EQ, FLOOR, GT, GTE, LT, LTE, NEQ, OR, ROUND, XOR
+from vtlengine.AST.Grammar.tokens import (
+    AND,
+    CEIL,
+    EQ,
+    FLOOR,
+    GT,
+    GTE,
+    LT,
+    LTE,
+    NEQ,
+    OR,
+    ROUND,
+    XOR,
+)
 from vtlengine.DataTypes import (
     COMP_NAME_MAPPING,
     SCALAR_TYPES_CLASS_REVERSE,
@@ -18,7 +31,7 @@ from vtlengine.DataTypes import (
     unary_implicit_promotion,
 )
 from vtlengine.DataTypes.TimeHandling import (
-    DURATION_MAPPING,
+    PERIOD_IND_MAPPING,
     TimeIntervalHandler,
     TimePeriodHandler,
 )
@@ -54,7 +67,6 @@ class Operator:
     @classmethod
     def cast_time_types(cls, data_type: Any, series: Any) -> Any:
         if cls.op not in BINARY_COMPARISON_OPERATORS:
             return series
         if data_type.__name__ == "TimeInterval":
@@ -64,7 +76,7 @@ class Operator:
         elif data_type.__name__ == "TimePeriod":
             series = series.map(lambda x: TimePeriodHandler(x), na_action="ignore")
         elif data_type.__name__ == "Duration":
-            series = series.map(lambda x: DURATION_MAPPING[x], na_action="ignore")
+            series = series.map(lambda x: PERIOD_IND_MAPPING[x], na_action="ignore")
         return series
     @classmethod
@@ -76,9 +88,9 @@ class Operator:
         elif data_type.__name__ == "TimePeriod":
             return TimePeriodHandler(value)
         elif data_type.__name__ == "Duration":
-            if value not in DURATION_MAPPING:
+            if value not in PERIOD_IND_MAPPING:
                 raise Exception(f"Duration {value} is not valid")
-            return DURATION_MAPPING[value]
+            return PERIOD_IND_MAPPING[value]
         return value
     @classmethod
@@ -165,11 +177,11 @@ class Operator:
 def _id_type_promotion_join_keys(
-        c_left: Component,
-        c_right: Component,
-        join_key: str,
-        left_data: Optional[pd.DataFrame] = None,
-        right_data: Optional[pd.DataFrame] = None,
+    c_left: Component,
+    c_right: Component,
+    join_key: str,
+    left_data: Optional[pd.DataFrame] = None,
+    right_data: Optional[pd.DataFrame] = None,
 ) -> None:
     if left_data is None:
         left_data = pd.DataFrame()
@@ -184,7 +196,7 @@ def _id_type_promotion_join_keys(
         right_data[join_key] = right_data[join_key].astype(object)
         return
     if (left_type_name == "Integer" and right_type_name == "Number") or (
-            left_type_name == "Number" and right_type_name == "Integer"
+        left_type_name == "Number" and right_type_name == "Integer"
     ):
         left_data[join_key] = left_data[join_key].map(lambda x: int(float(x)))
         right_data[join_key] = right_data[join_key].map(lambda x: int(float(x)))
@@ -209,7 +221,6 @@ def _handle_str_number(x: Union[str, int, float]) -> Union[str, int, float]:
 class Binary(Operator):
     @classmethod
     def op_func(cls, *args: Any) -> Any:
         x, y = args
@@ -220,7 +231,6 @@ class Binary(Operator):
     @classmethod
     def apply_operation_two_series(cls, left_series: Any, right_series: Any) -> Any:
         if os.getenv("SPARK", False):
             if cls.spark_op is None:
                 cls.spark_op = cls.py_op
@@ -234,12 +244,11 @@ class Binary(Operator):
     @classmethod
     def apply_operation_series_scalar(
-            cls,
-            series: Any,
-            scalar: Scalar,
-            series_left: bool,
+        cls,
+        series: Any,
+        scalar: Scalar,
+        series_left: bool,
     ) -> Any:
         if scalar is None:
             return pd.Series(None, index=series.index)
         if series_left:
@@ -280,7 +289,6 @@ class Binary(Operator):
     @classmethod
     def dataset_validation(cls, left_operand: Dataset, right_operand: Dataset) -> Dataset:
         left_identifiers = left_operand.get_identifiers_names()
         right_identifiers = right_operand.get_identifiers_names()
@@ -293,7 +301,10 @@ class Binary(Operator):
         if left_measures_names != right_measures_names:
             raise SemanticError(
-                "1-1-14-1", op=cls.op, left=left_measures_names, right=right_measures_names
+                "1-1-14-1",
+                op=cls.op,
+                left=left_measures_names,
+                right=right_measures_names,
             )
         elif len(left_measures) == 0:
             raise SemanticError("1-1-1-8", op=cls.op, name=left_operand.name)
@@ -331,7 +342,6 @@ class Binary(Operator):
     @classmethod
     def dataset_scalar_validation(cls, dataset: Dataset, scalar: Scalar) -> Dataset:
         if len(dataset.get_measures()) == 0:
             raise SemanticError("1-1-1-8", op=cls.op, name=dataset.name)
@@ -346,11 +356,12 @@ class Binary(Operator):
     @classmethod
     def scalar_validation(cls, left_operand: Scalar, right_operand: Scalar) -> Scalar:
         if not cls.validate_type_compatibility(left_operand.data_type, right_operand.data_type):
             raise SemanticError(
-                "1-1-1-2", type_1=left_operand.data_type, type_2=right_operand.data_type,
-                type_check=cls.type_to_check
+                "1-1-1-2",
+                type_1=left_operand.data_type,
+                type_2=right_operand.data_type,
+                type_check=cls.type_to_check,
             )
         return Scalar(
             name="result",
@@ -360,7 +371,7 @@ class Binary(Operator):
     @classmethod
     def component_validation(
-            cls, left_operand: DataComponent, right_operand: DataComponent
+        cls, left_operand: DataComponent, right_operand: DataComponent
     ) -> DataComponent:
         """
         Validates the compatibility between the types of the components and the operator
@@ -382,7 +393,6 @@ class Binary(Operator):
     @classmethod
     def component_scalar_validation(cls, component: DataComponent, scalar: Scalar) -> DataComponent:
         cls.type_validation(component.data_type, scalar.data_type)
         result = DataComponent(
             name=component.name,
@@ -395,7 +405,6 @@ class Binary(Operator):
     @classmethod
     def dataset_set_validation(cls, dataset: Dataset, scalar_set: ScalarSet) -> Dataset:
         if len(dataset.get_measures()) == 0:
             raise SemanticError("1-1-1-8", op=cls.op, name=dataset.name)
         for measure in dataset.get_measures():
@@ -412,9 +421,8 @@ class Binary(Operator):
     @classmethod
     def component_set_validation(
-            cls, component: DataComponent, scalar_set: ScalarSet
+        cls, component: DataComponent, scalar_set: ScalarSet
     ) -> DataComponent:
         cls.type_validation(component.data_type, scalar_set.data_type)
         result = DataComponent(
             name="result",
@@ -427,7 +435,6 @@ class Binary(Operator):
     @classmethod
     def scalar_set_validation(cls, scalar: Scalar, scalar_set: ScalarSet) -> Scalar:
         cls.type_validation(scalar.data_type, scalar_set.data_type)
         return Scalar(
             name="result",
@@ -468,7 +475,7 @@ class Binary(Operator):
     @classmethod
     def apply_return_type_dataset(
-            cls, result_dataset: Dataset, left_operand: Any, right_operand: Any
+        cls, result_dataset: Dataset, left_operand: Any, right_operand: Any
     ) -> None:
         """
         Used in dataset's validation.
@@ -498,9 +505,9 @@ class Binary(Operator):
                 if result_dataset.data is not None:
                     result_dataset.data.rename(columns={measure.name: component.name}, inplace=True)
             elif (
-                    changed_allowed is False
-                    and is_mono_measure is False
-                    and left_type.promotion_changed_type(result_data_type)
+                changed_allowed is False
+                and is_mono_measure is False
+                and left_type.promotion_changed_type(result_data_type)
             ):
                 raise SemanticError("1-1-1-4", op=cls.op)
             else:
@@ -508,7 +515,6 @@ class Binary(Operator):
     @classmethod
     def dataset_evaluation(cls, left_operand: Dataset, right_operand: Dataset) -> Dataset:
         result_dataset = cls.dataset_validation(left_operand, right_operand)
         use_right_as_base = False
@@ -587,16 +593,14 @@ class Binary(Operator):
     @classmethod
     def scalar_evaluation(cls, left_operand: Scalar, right_operand: Scalar) -> Scalar:
         result_scalar = cls.scalar_validation(left_operand, right_operand)
         result_scalar.value = cls.op_func(left_operand.value, right_operand.value)
         return result_scalar
     @classmethod
     def dataset_scalar_evaluation(
-            cls, dataset: Dataset, scalar: Scalar, dataset_left: bool = True
+        cls, dataset: Dataset, scalar: Scalar, dataset_left: bool = True
     ) -> Dataset:
         result_dataset = cls.dataset_scalar_validation(dataset, scalar)
         result_data = dataset.data.copy() if dataset.data is not None else pd.DataFrame()
         result_dataset.data = result_data
@@ -606,9 +610,9 @@ class Binary(Operator):
         for measure in dataset.get_measures():
             measure_data = cls.cast_time_types(measure.data_type, result_data[measure.name].copy())
             if measure.data_type.__name__.__str__() == "Duration" and not isinstance(
-                    scalar_value, int
+                scalar_value, int
             ):
-                scalar_value = DURATION_MAPPING[scalar_value]
+                scalar_value = PERIOD_IND_MAPPING[scalar_value]
             result_dataset.data[measure.name] = cls.apply_operation_series_scalar(
                 measure_data, scalar_value, dataset_left
             )
@@ -621,9 +625,8 @@ class Binary(Operator):
     @classmethod
     def component_evaluation(
-            cls, left_operand: DataComponent, right_operand: DataComponent
+        cls, left_operand: DataComponent, right_operand: DataComponent
     ) -> DataComponent:
         result_component = cls.component_validation(left_operand, right_operand)
         left_data = cls.cast_time_types(
             left_operand.data_type,
@@ -631,16 +634,15 @@ class Binary(Operator):
         )
         right_data = cls.cast_time_types(
             right_operand.data_type,
-            right_operand.data.copy() if right_operand.data is not None else pd.Series(),
+            (right_operand.data.copy() if right_operand.data is not None else pd.Series()),
         )
         result_component.data = cls.apply_operation_two_series(left_data, right_data)
         return result_component
     @classmethod
     def component_scalar_evaluation(
-            cls, component: DataComponent, scalar: Scalar, component_left: bool = True
+        cls, component: DataComponent, scalar: Scalar, component_left: bool = True
     ) -> DataComponent:
         result_component = cls.component_scalar_validation(component, scalar)
         comp_data = cls.cast_time_types(
             component.data_type,
@@ -648,9 +650,9 @@ class Binary(Operator):
         )
         scalar_value = cls.cast_time_types_scalar(scalar.data_type, scalar.value)
         if component.data_type.__name__.__str__() == "Duration" and not isinstance(
-                scalar_value, int
+            scalar_value, int
         ):
-            scalar_value = DURATION_MAPPING[scalar_value]
+            scalar_value = PERIOD_IND_MAPPING[scalar_value]
         result_component.data = cls.apply_operation_series_scalar(
             comp_data, scalar_value, component_left
         )
@@ -658,7 +660,6 @@ class Binary(Operator):
     @classmethod
     def dataset_set_evaluation(cls, dataset: Dataset, scalar_set: ScalarSet) -> Dataset:
         result_dataset = cls.dataset_set_validation(dataset, scalar_set)
         result_data = dataset.data.copy() if dataset.data is not None else pd.DataFrame()
@@ -676,18 +677,17 @@ class Binary(Operator):
     @classmethod
     def component_set_evaluation(
-            cls, component: DataComponent, scalar_set: ScalarSet
+        cls, component: DataComponent, scalar_set: ScalarSet
     ) -> DataComponent:
         result_component = cls.component_set_validation(component, scalar_set)
         result_component.data = cls.apply_operation_two_series(
-            component.data.copy() if component.data is not None else pd.Series(), scalar_set
+            component.data.copy() if component.data is not None else pd.Series(),
+            scalar_set,
         )
         return result_component
     @classmethod
     def scalar_set_evaluation(cls, scalar: Scalar, scalar_set: ScalarSet) -> Scalar:
         result_scalar = cls.scalar_set_validation(scalar, scalar_set)
         result_scalar.value = cls.op_func(scalar.value, scalar_set)
         return result_scalar
@@ -726,7 +726,6 @@ class Binary(Operator):
 class Unary(Operator):
     @classmethod
     def op_func(cls, *args: Any) -> Any:
         x = args[0]
@@ -758,7 +757,6 @@ class Unary(Operator):
     @classmethod
     def dataset_validation(cls, operand: Dataset) -> Dataset:
         cls.validate_dataset_type(operand)
         if len(operand.get_measures()) == 0:
             raise SemanticError("1-1-1-8", op=cls.op, name=operand.name)
@@ -774,14 +772,12 @@ class Unary(Operator):
     @classmethod
     def scalar_validation(cls, operand: Scalar) -> Scalar:
         result_type = cls.type_validation(operand.data_type)
         result = Scalar(name="result", data_type=result_type, value=None)
         return result
     @classmethod
     def component_validation(cls, operand: DataComponent) -> DataComponent:
         result_type = cls.type_validation(operand.data_type)
         result = DataComponent(
             name="result",
@@ -795,18 +791,15 @@ class Unary(Operator):
     # The following class method implements the type promotion
     @classmethod
     def type_validation(cls, operand: Any) -> Any:
         return unary_implicit_promotion(operand, cls.type_to_check, cls.return_type)
     # The following class method checks the type promotion
     @classmethod
     def validate_type_compatibility(cls, operand: Any) -> bool:
         return check_unary_implicit_promotion(operand, cls.type_to_check, cls.return_type)
     @classmethod
     def validate_dataset_type(cls, dataset: Dataset) -> None:
         if cls.type_to_check is not None:
             for measure in dataset.get_measures():
                 if not cls.validate_type_compatibility(measure.data_type):
@@ -820,7 +813,6 @@ class Unary(Operator):
     @classmethod
     def validate_scalar_type(cls, scalar: Scalar) -> None:
         if cls.type_to_check is not None and not cls.validate_type_compatibility(scalar.data_type):
             raise SemanticError(
                 "1-1-1-5",
@@ -831,7 +823,6 @@ class Unary(Operator):
     @classmethod
     def apply_return_type_dataset(cls, result_dataset: Dataset, operand: Dataset) -> None:
         changed_allowed = cls.op in MONOMEASURE_CHANGED_ALLOWED
         is_mono_measure = len(operand.get_measures()) == 1
         for measure in result_dataset.get_measures():
@@ -850,9 +841,9 @@ class Unary(Operator):
                 if result_dataset.data is not None:
                     result_dataset.data.rename(columns={measure.name: component.name}, inplace=True)
             elif (
-                    changed_allowed is False
-                    and is_mono_measure is False
-                    and operand_type.promotion_changed_type(result_data_type)
+                changed_allowed is False
+                and is_mono_measure is False
+                and operand_type.promotion_changed_type(result_data_type)
             ):
                 raise SemanticError("1-1-1-4", op=cls.op)
             else:
@@ -860,7 +851,6 @@ class Unary(Operator):
     @classmethod
     def evaluate(cls, operand: ALL_MODEL_DATA_TYPES) -> Any:
         if isinstance(operand, Dataset):
             return cls.dataset_evaluation(operand)
         if isinstance(operand, Scalar):
@@ -870,7 +860,6 @@ class Unary(Operator):
     @classmethod
     def dataset_evaluation(cls, operand: Dataset) -> Dataset:
         result_dataset = cls.dataset_validation(operand)
         result_data = operand.data.copy() if operand.data is not None else pd.DataFrame()
         for measure_name in operand.get_measures_names():
@@ -885,14 +874,12 @@ class Unary(Operator):
     @classmethod
     def scalar_evaluation(cls, operand: Scalar) -> Scalar:
         result_scalar = cls.scalar_validation(operand)
         result_scalar.value = cls.op_func(operand.value)
         return result_scalar
     @classmethod
     def component_evaluation(cls, operand: DataComponent) -> DataComponent:
         result_component = cls.component_validation(operand)
         result_component.data = cls.apply_operation_component(
             operand.data.copy() if operand.data is not None else pd.Series()

vtlengine/Utils/__init__.py CHANGED Viewed

@@ -339,7 +339,12 @@ REGULAR_AGGREGATION_MAPPING = {
     APPLY: Apply,
 }
-SET_MAPPING = {UNION: Union, INTERSECT: Intersection, SYMDIFF: Symdiff, SETDIFF: Setdiff}
+SET_MAPPING = {
+    UNION: Union,
+    INTERSECT: Intersection,
+    SYMDIFF: Symdiff,
+    SETDIFF: Setdiff,
+}
 AGGREGATION_MAPPING = {
     MAX: Max,

vtlengine/__extras_check.py ADDED Viewed

@@ -0,0 +1,17 @@
+import importlib.util
+EXTRAS_DOCS = "https://docs.vtlengine.meaningfuldata.eu/#installation"
+ERROR_MESSAGE = (
+    "The '{extra_name}' extra is required to run {extra_desc}. "
+    "Please install it using 'pip install vtlengine[{extra_name}]' or "
+    "install all extras with 'pip install vtlengine[all]'. "
+    f"Check the documentation at: {EXTRAS_DOCS}"
+)
+def __check_s3_extra() -> None:
+    package_loc = importlib.util.find_spec("s3fs")
+    if package_loc is None:
+        raise ImportError(
+            ERROR_MESSAGE.format(extra_name="s3", extra_desc="over csv files using S3 URIs")
+        ) from None

vtlengine/files/output/__init__.py CHANGED Viewed

@@ -3,6 +3,7 @@ from typing import Optional, Union
 import pandas as pd
+from vtlengine.__extras_check import __check_s3_extra
 from vtlengine.files.output._time_period_representation import (
     TimePeriodRepresentation,
     format_time_period_external_representation,
@@ -15,12 +16,12 @@ def save_datapoints(
     dataset: Dataset,
     output_path: Union[str, Path],
 ) -> None:
     if dataset.data is None:
         dataset.data = pd.DataFrame()
     if time_period_representation is not None:
         format_time_period_external_representation(dataset, time_period_representation)
     if isinstance(output_path, str):
+        __check_s3_extra()
         if output_path.endswith("/"):
             s3_file_output = output_path + f"{dataset.name}.csv"
         else:

vtlengine/files/output/_time_period_representation.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from enum import Enum
+from typing import Union
 from vtlengine.DataTypes import TimePeriod
 from vtlengine.DataTypes.TimeHandling import TimePeriodHandler
@@ -23,7 +24,7 @@ def _format_vtl_representation(value: str) -> str:
 def format_time_period_external_representation(
-    dataset: Dataset | Scalar, mode: TimePeriodRepresentation
+    dataset: Union[Dataset, Scalar], mode: TimePeriodRepresentation
 ) -> None:
     """
     From SDMX time period representation to standard VTL representation (no hyphen).

vtlengine/files/parser/__init__.py CHANGED Viewed

@@ -17,10 +17,14 @@ from vtlengine.DataTypes import (
     TimeInterval,
     TimePeriod,
 )
-from vtlengine.DataTypes.TimeHandling import DURATION_MAPPING
+from vtlengine.DataTypes.TimeHandling import PERIOD_IND_MAPPING
 from vtlengine.Exceptions import InputValidationException, SemanticError
 from vtlengine.files.parser._rfc_dialect import register_rfc
-from vtlengine.files.parser._time_checking import check_date, check_time, check_time_period
+from vtlengine.files.parser._time_checking import (
+    check_date,
+    check_time,
+    check_time_period,
+)
 from vtlengine.Model import Component, Dataset, Role
 TIME_CHECKS_MAPPING: Dict[Type[ScalarType], Any] = {
@@ -73,8 +77,11 @@ def _sanitize_pandas_columns(
     components: Dict[str, Component], csv_path: Union[str, Path], data: pd.DataFrame
 ) -> pd.DataFrame:
     # Fast loading from SDMX-CSV
-    if ("DATAFLOW" in data.columns and data.columns[0] == "DATAFLOW" and
-            "DATAFLOW" not in components):
+    if (
+        "DATAFLOW" in data.columns
+        and data.columns[0] == "DATAFLOW"
+        and "DATAFLOW" not in components
+    ):
         data.drop(columns=["DATAFLOW"], inplace=True)
     if "STRUCTURE" in data.columns and data.columns[0] == "STRUCTURE":
         if "STRUCTURE" not in components:
@@ -102,32 +109,23 @@ def _sanitize_pandas_columns(
     return data
-def _pandas_load_csv(components: Dict[str, Component], csv_path: Path) -> pd.DataFrame:
+def _pandas_load_csv(components: Dict[str, Component], csv_path: Union[str, Path]) -> pd.DataFrame:
     obj_dtypes = {comp_name: np.object_ for comp_name, comp in components.items()}
     try:
         data = pd.read_csv(
-            csv_path, dtype=obj_dtypes, engine="c", keep_default_na=False, na_values=[""]
+            csv_path,
+            dtype=obj_dtypes,
+            engine="c",
+            keep_default_na=False,
+            na_values=[""],
         )
     except UnicodeDecodeError:
-        raise InputValidationException(code="0-1-2-5", file=csv_path.name)
-    return _sanitize_pandas_columns(components, csv_path, data)
-def _pandas_load_s3_csv(components: Dict[str, Component], csv_path: str) -> pd.DataFrame:
-    obj_dtypes = {comp_name: np.object_ for comp_name, comp in components.items()}
-    # start = time()
-    try:
-        data = pd.read_csv(
-            csv_path, dtype=obj_dtypes, engine="c", keep_default_na=False, na_values=[""]
-        )
+        if isinstance(csv_path, Path):
+            raise InputValidationException(code="0-1-2-5", file=csv_path.name)
+        else:
+            raise InputValidationException(code="0-1-2-5", file=csv_path)
-    except UnicodeDecodeError:
-        raise InputValidationException(code="0-1-2-5", file=csv_path)
-    except Exception as e:
-        raise InputValidationException(f"ERROR: {str(e)}, review file {str(csv_path)}")
     return _sanitize_pandas_columns(components, csv_path, data)
@@ -165,7 +163,6 @@ def _validate_pandas(
     comp_name = ""
     comp = None
     try:
         for comp_name, comp in components.items():
             if comp.data_type in (Date, TimePeriod, TimeInterval):
                 data[comp_name] = data[comp_name].map(
@@ -184,16 +181,34 @@ def _validate_pandas(
             elif comp.data_type == Duration:
                 values_correct = (
                     data[comp_name]
-                    .map(lambda x: x.replace(" ", "") in DURATION_MAPPING, na_action="ignore")
+                    .map(
+                        lambda x: Duration.validate_duration(x),
+                        na_action="ignore",
+                    )
                     .all()
                 )
                 if not values_correct:
-                    raise ValueError(f"Duration values are not correct in column {comp_name}")
+                    try:
+                        values_correct = (
+                            data[comp_name]
+                            .map(
+                                lambda x: x.replace(" ", "") in PERIOD_IND_MAPPING,
+                                na_action="ignore",
+                            )
+                            .all()
+                        )
+                        if not values_correct:
+                            raise ValueError(
+                                f"Duration values are not correct in column {comp_name}"
+                            )
+                    except ValueError:
+                        raise ValueError(f"Duration values are not correct in column {comp_name}")
             else:
                 data[comp_name] = data[comp_name].map(
                     lambda x: str(x).replace('"', ""), na_action="ignore"
                 )
             data[comp_name] = data[comp_name].astype(np.object_, errors="raise")
     except ValueError:
         str_comp = SCALAR_TYPES_CLASS_REVERSE[comp.data_type] if comp else "Null"
         raise SemanticError("0-1-1-12", name=dataset_name, column=comp_name, type=str_comp)
@@ -202,14 +217,15 @@ def _validate_pandas(
 def load_datapoints(
-    components: Dict[str, Component], dataset_name: str, csv_path: Optional[Union[Path, str]] = None
+    components: Dict[str, Component],
+    dataset_name: str,
+    csv_path: Optional[Union[Path, str]] = None,
 ) -> pd.DataFrame:
     if csv_path is None or (isinstance(csv_path, Path) and not csv_path.exists()):
         return pd.DataFrame(columns=list(components.keys()))
-    elif isinstance(csv_path, str):
-        data = _pandas_load_s3_csv(components, csv_path)
-    elif isinstance(csv_path, Path):
-        _validate_csv_path(components, csv_path)
+    elif isinstance(csv_path, (str, Path)):
+        if isinstance(csv_path, Path):
+            _validate_csv_path(components, csv_path)
         data = _pandas_load_csv(components, csv_path)
     else:
         raise Exception("Invalid csv_path type")

vtlengine/files/parser/_rfc_dialect.py CHANGED Viewed

@@ -19,4 +19,4 @@ class RFCDialect(csv.Dialect):
 def register_rfc() -> None:
     """Register the RFC dialect."""
-    csv.register_dialect("rfc", RFCDialect)
+    csv.register_dialect("rfc", RFCDialect)  # type: ignore[arg-type]

vtlengine/files/parser/_time_checking.py CHANGED Viewed

@@ -21,16 +21,16 @@ def check_date(value: str) -> str:
             raise InputValidationException(f"Date {value} is out of range for the month.")
         if "month must be in 1..12" in str(e):
             raise InputValidationException(
-                f"Date {value} is invalid. " f"Month must be between 1 and 12."
+                f"Date {value} is invalid. Month must be between 1 and 12."
             )
         raise InputValidationException(
-            f"Date {value} is not in the correct format. " f"Use YYYY-MM-DD."
+            f"Date {value} is not in the correct format. Use YYYY-MM-DD."
         )
     # Check date is between 1900 and 9999
     if not 1800 <= date_value.year <= 9999:
         raise InputValidationException(
-            f"Date {value} is invalid. " f"Year must be between 1900 and 9999."
+            f"Date {value} is invalid. Year must be between 1900 and 9999."
         )
     return date_value.isoformat()
@@ -68,7 +68,7 @@ def check_time(value: str) -> str:
             raise ValueError("Start date is greater than end date.")
         return value
     raise ValueError(
-        "Time is not in the correct format. " "Use YYYY-MM-DD/YYYY-MM-DD or YYYY or YYYY-MM."
+        "Time is not in the correct format. Use YYYY-MM-DD/YYYY-MM-DD or YYYY or YYYY-MM."
     )

vtlengine 1.0.3rc3__py3-none-any.whl → 1.1rc1__py3-none-any.whl

Potentially problematic release.

vtlengine 1.0.3rc3py3-none-any.whl → 1.1rc1py3-none-any.whl