PyPI - vtlengine - Versions diffs - 1.0__py3-none-any.whl → 1.0.2__py3-none-any.whl - Mend

vtlengine 1.0py3-none-any.whl → 1.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of vtlengine might be problematic. Click here for more details.

Files changed (56) hide show

vtlengine/API/_InternalApi.py +159 -102
vtlengine/API/__init__.py +110 -68
vtlengine/AST/ASTConstructor.py +188 -98
vtlengine/AST/ASTConstructorModules/Expr.py +402 -205
vtlengine/AST/ASTConstructorModules/ExprComponents.py +248 -104
vtlengine/AST/ASTConstructorModules/Terminals.py +158 -95
vtlengine/AST/ASTEncoders.py +1 -1
vtlengine/AST/ASTTemplate.py +24 -9
vtlengine/AST/ASTVisitor.py +8 -12
vtlengine/AST/DAG/__init__.py +43 -35
vtlengine/AST/DAG/_words.py +4 -4
vtlengine/AST/Grammar/Vtl.g4 +49 -20
vtlengine/AST/Grammar/VtlTokens.g4 +13 -1
vtlengine/AST/Grammar/lexer.py +2012 -1312
vtlengine/AST/Grammar/parser.py +7524 -4343
vtlengine/AST/Grammar/tokens.py +140 -128
vtlengine/AST/VtlVisitor.py +16 -5
vtlengine/AST/__init__.py +41 -11
vtlengine/DataTypes/NumericTypesHandling.py +5 -4
vtlengine/DataTypes/TimeHandling.py +196 -301
vtlengine/DataTypes/__init__.py +304 -218
vtlengine/Exceptions/__init__.py +96 -27
vtlengine/Exceptions/messages.py +149 -69
vtlengine/Interpreter/__init__.py +817 -497
vtlengine/Model/__init__.py +172 -121
vtlengine/Operators/Aggregation.py +156 -95
vtlengine/Operators/Analytic.py +167 -79
vtlengine/Operators/Assignment.py +7 -4
vtlengine/Operators/Boolean.py +27 -32
vtlengine/Operators/CastOperator.py +177 -131
vtlengine/Operators/Clause.py +137 -99
vtlengine/Operators/Comparison.py +148 -117
vtlengine/Operators/Conditional.py +290 -98
vtlengine/Operators/General.py +68 -47
vtlengine/Operators/HROperators.py +91 -72
vtlengine/Operators/Join.py +217 -118
vtlengine/Operators/Numeric.py +129 -46
vtlengine/Operators/RoleSetter.py +16 -15
vtlengine/Operators/Set.py +61 -36
vtlengine/Operators/String.py +213 -139
vtlengine/Operators/Time.py +467 -215
vtlengine/Operators/Validation.py +117 -76
vtlengine/Operators/__init__.py +340 -213
vtlengine/Utils/__init__.py +232 -41
vtlengine/__init__.py +1 -1
vtlengine/files/output/__init__.py +15 -6
vtlengine/files/output/_time_period_representation.py +10 -9
vtlengine/files/parser/__init__.py +79 -52
vtlengine/files/parser/_rfc_dialect.py +6 -5
vtlengine/files/parser/_time_checking.py +48 -37
vtlengine-1.0.2.dist-info/METADATA +245 -0
vtlengine-1.0.2.dist-info/RECORD +58 -0
{vtlengine-1.0.dist-info → vtlengine-1.0.2.dist-info}/WHEEL +1 -1
vtlengine-1.0.dist-info/METADATA +0 -104
vtlengine-1.0.dist-info/RECORD +0 -58
{vtlengine-1.0.dist-info → vtlengine-1.0.2.dist-info}/LICENSE.md +0 -0

vtlengine/API/_InternalApi.py CHANGED Viewed

@@ -1,14 +1,16 @@
 import json
 import os
 from pathlib import Path
-from typing import Union, Optional, Dict, List
+from typing import Union, Optional, Dict, List, Any
 import pandas as pd
-from s3fs import S3FileSystem
+from s3fs import S3FileSystem  # type: ignore[import-untyped]
 from vtlengine.AST import PersistentAssignment, Start
 from vtlengine.DataTypes import SCALAR_TYPES
-from vtlengine.Model import ValueDomain, Dataset, Scalar, Component, Role, ExternalRoutine
+from vtlengine.Exceptions import check_key
+from vtlengine.Model import (ValueDomain, Dataset, Scalar, Component, Role,
+                             ExternalRoutine, Role_keys)
 from vtlengine.files.parser import _validate_pandas, _fill_dataset_empty_data
 base_path = Path(__file__).parent
@@ -21,92 +23,98 @@ filepath_out_json = base_path / "data" / "DataStructure" / "output"
 filepath_out_csv = base_path / "data" / "DataSet" / "output"
-def _load_dataset_from_structure(structures: dict):
+def _load_dataset_from_structure(structures: Dict[str, Any]) -> Dict[str, Any]:
     """
     Loads a dataset with the structure given.
     """
     datasets = {}
-    if 'datasets' in structures:
-        for dataset_json in structures['datasets']:
-            dataset_name = dataset_json['name']
-            components = {
-                component['name']: Component(name=component['name'],
-                                             data_type=SCALAR_TYPES[component['type']],
-                                             role=Role(component['role']),
-                                             nullable=component['nullable'])
-                for component in dataset_json['DataStructure']}
-            datasets[dataset_name] = Dataset(name=dataset_name,
-                                             components=components,
-                                             data=None)
-    if 'scalars' in structures:
-        for scalar_json in structures['scalars']:
-            scalar_name = scalar_json['name']
-            scalar = Scalar(name=scalar_name,
-                            data_type=SCALAR_TYPES[scalar_json['type']],
-                            value=None)
-            datasets[scalar_name] = scalar
+    if "datasets" in structures:
+        for dataset_json in structures["datasets"]:
+            dataset_name = dataset_json["name"]
+            components = {}
+            for component in dataset_json["DataStructure"]:
+                check_key("data_type", SCALAR_TYPES.keys(), component["type"])
+                check_key("role", Role_keys, component["role"])
+                components[component["name"]] = Component(
+                    name=component["name"],
+                    data_type=SCALAR_TYPES[component["type"]],
+                    role=Role(component["role"]),
+                    nullable=component["nullable"],
+                )
+            datasets[dataset_name] = Dataset(name=dataset_name, components=components, data=None)
+    if "scalars" in structures:
+        for scalar_json in structures["scalars"]:
+            scalar_name = scalar_json["name"]
+            scalar = Scalar(
+                name=scalar_name, data_type=SCALAR_TYPES[scalar_json["type"]], value=None
+            )
+            datasets[scalar_name] = scalar  # type: ignore[assignment]
     return datasets
-def _load_single_datapoint(datapoint: Union[str, Path]):
+def _load_single_datapoint(datapoint: Union[str, Path]) -> Dict[str, Any]:
     """
     Returns a dict with the data given from one dataset.
     """
     if not isinstance(datapoint, (Path, str)):
-        raise Exception('Invalid datapoint. Input must be a Path or an S3 URI')
+        raise Exception("Invalid datapoint. Input must be a Path or an S3 URI")
     if isinstance(datapoint, str):
-        if 's3://' in datapoint:
+        if "s3://" in datapoint:
             # Handling S3 URI
             s3fs_obj = S3FileSystem()
             # Check if the S3 URI is valid
             if not s3fs_obj.exists(datapoint):
                 raise Exception(
-                    f'Invalid datapoint. S3 URI does not exist or it is not accessible: {datapoint}')
+                    f"Invalid datapoint. S3 URI does not exist or it is not accessible: {datapoint}"
+                )
             # Check if the S3 URI is a directory
             if s3fs_obj.isdir(datapoint):
-                datapoints = {}
+                datapoints: Dict[str, Any] = {}
                 for f in s3fs_obj.ls(datapoint):
-                    if f.endswith('.csv'):
-                        dataset_name = f.split('/')[-1].removesuffix('.csv')
+                    if f.endswith(".csv"):
+                        dataset_name = f.split("/")[-1].removesuffix(".csv")
                         dict_data = {dataset_name: f"s3://{f}"}
                         datapoints = {**datapoints, **dict_data}
                 return datapoints
             # Check if the S3 URI is a csv file
-            if s3fs_obj.isfile(datapoint) and not datapoint.endswith('.csv'):
-                raise Exception(f'Invalid datapoint. S3 URI must refer to a csv file: {datapoint}')
-            dataset_name = datapoint.split('/')[-1].removesuffix('.csv')
+            if s3fs_obj.isfile(datapoint) and not datapoint.endswith(".csv"):
+                raise Exception(f"Invalid datapoint. S3 URI must refer to a csv file: {datapoint}")
+            dataset_name = datapoint.split("/")[-1].removesuffix(".csv")
             dict_data = {dataset_name: datapoint}
             return dict_data
         try:
             datapoint = Path(datapoint)
         except Exception:
-            raise Exception('Invalid datapoint. Input must refer to a Path or an S3 URI')
+            raise Exception("Invalid datapoint. Input must refer to a Path or an S3 URI")
     if datapoint.is_dir():
         datapoints = {}
         for f in datapoint.iterdir():
-            if f.suffix != '.csv':
+            if f.suffix != ".csv":
                 continue
             dp = _load_single_datapoint(f)
             datapoints = {**datapoints, **dp}
         dict_data = datapoints
     else:
-        dataset_name = datapoint.name.removesuffix('.csv')
-        dict_data = {dataset_name: datapoint}
+        dataset_name = datapoint.name.removesuffix(".csv")
+        dict_data = {dataset_name: datapoint}  # type: ignore[dict-item]
     return dict_data
-def _load_datapoints_path(datapoints: Union[Path, str, List[Union[str, Path]]]):
+def _load_datapoints_path(
+    datapoints: Union[Path, str, List[Union[str, Path]]]
+) -> Dict[str, Dataset]:
     """
     Returns a dict with the data given from a Path.
     """
     if isinstance(datapoints, list):
-        dict_datapoints = {}
+        dict_datapoints: Dict[str, Any] = {}
         for x in datapoints:
             result = _load_single_datapoint(x)
             dict_datapoints = {**dict_datapoints, **result}
@@ -114,40 +122,53 @@ def _load_datapoints_path(datapoints: Union[Path, str, List[Union[str, Path]]]):
     return _load_single_datapoint(datapoints)
-def _load_datastructure_single(data_structure: Union[dict, Path]):
+def _load_datastructure_single(data_structure: Union[Dict[str, Any], Path]) -> Dict[str, Dataset]:
     """
     Loads a single data structure.
     """
     if isinstance(data_structure, dict):
         return _load_dataset_from_structure(data_structure)
     if not isinstance(data_structure, Path):
-        raise Exception('Invalid datastructure. Input must be a dict or Path object')
+        raise Exception("Invalid datastructure. Input must be a dict or Path object")
     if not data_structure.exists():
-        raise Exception('Invalid datastructure. Input does not exist')
+        raise Exception("Invalid datastructure. Input does not exist")
     if data_structure.is_dir():
-        datasets = {}
+        datasets: Dict[str, Any] = {}
         for f in data_structure.iterdir():
-            if f.suffix != '.json':
+            if f.suffix != ".json":
                 continue
             dataset = _load_datastructure_single(f)
             datasets = {**datasets, **dataset}
         return datasets
     else:
-        if data_structure.suffix != '.json':
-            raise Exception('Invalid datastructure. Must have .json extension')
-        with open(data_structure, 'r') as file:
+        if data_structure.suffix != ".json":
+            raise Exception("Invalid datastructure. Must have .json extension")
+        with open(data_structure, "r") as file:
             structures = json.load(file)
     return _load_dataset_from_structure(structures)
-def load_datasets(data_structure: Union[dict, Path, List[Union[dict, Path]]]):
+def load_datasets(
+    data_structure: Union[Dict[str, Any], Path, List[Union[Dict[str, Any], Path]]]
+) -> Dict[str, Dataset]:
     """
     Loads multiple datasets.
+    Args:
+        data_structure: Dict, Path or a List of dicts or Paths.
+    Returns:
+        The datastructure as a dict or a list of datastructures as dicts. \
+        These dicts will have as keys the name, role, \
+        type and nullable of the data contained in the dataset.
+    Raises:
+        Exception: If the Path is invalid or datastructure has a wrong format.
     """
     if isinstance(data_structure, dict):
         return _load_datastructure_single(data_structure)
     if isinstance(data_structure, list):
-        ds_structures = {}
+        ds_structures: Dict[str, Any] = {}
         for x in data_structure:
             result = _load_datastructure_single(x)
             ds_structures = {**ds_structures, **result}  # Overwrite ds_structures dict.
@@ -155,11 +176,19 @@ def load_datasets(data_structure: Union[dict, Path, List[Union[dict, Path]]]):
     return _load_datastructure_single(data_structure)
-def load_datasets_with_data(data_structures: Union[dict, Path, List[Union[dict, Path]]],
-                            datapoints: Optional[Union[dict, Path, List[Path]]] = None):
+def load_datasets_with_data(data_structures: Any, datapoints: Optional[Any] = None) -> Any:
     """
-    Loads the dataset structures and fills them with the data contained in the datapoints. Returns a dict with the
-    structure and a pandas dataframe.
+    Loads the dataset structures and fills them with the data contained in the datapoints.
+    Args:
+        data_structures: Dict, Path or a List of dicts or Paths.
+        datapoints: Dict, Path or a List of Paths.
+    Returns:
+        A dict with the structure and a pandas dataframe with the data.
+    Raises:
+        Exception: If the Path is wrong or the file is invalid.
     """
     datasets = load_datasets(data_structures)
     if datapoints is None:
@@ -172,11 +201,14 @@ def load_datasets_with_data(data_structures: Union[dict, Path, List[Union[dict,
         for dataset_name, data in datapoints.items():
             if dataset_name not in datasets:
                 raise Exception(f"Not found dataset {dataset_name}")
-            datasets[dataset_name].data = _validate_pandas(datasets[dataset_name].components, data)
+            datasets[dataset_name].data = _validate_pandas(
+                datasets[dataset_name].components, data, dataset_name
+            )
         for dataset_name in datasets:
             if datasets[dataset_name].data is None:
                 datasets[dataset_name].data = pd.DataFrame(
-                    columns=list(datasets[dataset_name].components.keys()))
+                    columns=list(datasets[dataset_name].components.keys())
+                )
         return datasets, None
     # Handling dictionary of paths
     dict_datapoints = _load_datapoints_path(datapoints)
@@ -187,69 +219,88 @@ def load_datasets_with_data(data_structures: Union[dict, Path, List[Union[dict,
     return datasets, dict_datapoints
-def load_vtl(input: Union[str, Path]):
+def load_vtl(input: Union[str, Path]) -> str:
     """
     Reads the vtl expression.
-    :param input: String or Path of the vtl expression.
+    Args:
+        input: String or Path of the vtl expression.
+    Returns:
+        If it is a string, it will return the input as a string. \
+        If it is a Path, it will return the expression contained in the file as a string.
-    :return: If it is a string, it will return the input. If it is a Path, it will return the expression contained in
-    the file.
+    Raises:
+        Exception: If the vtl does not exist, if the Path is wrong, or if it is not a vtl file.
     """
     if isinstance(input, str):
-        return input
+        if os.path.exists(input):
+            input = Path(input)
+        else:
+            return input
     if not isinstance(input, Path):
-        raise Exception('Invalid vtl file. Input is not a Path object')
+        raise Exception("Invalid vtl file. Input is not a Path object")
     if not input.exists():
-        raise Exception('Invalid vtl file. Input does not exist')
-    if input.suffix != '.vtl':
-        raise Exception('Invalid vtl file. Must have .vtl extension')
-    with open(input, 'r') as f:
+        raise Exception("Invalid vtl file. Input does not exist")
+    if input.suffix != ".vtl":
+        raise Exception("Invalid vtl file. Must have .vtl extension")
+    with open(input, "r") as f:
         return f.read()
-def _load_single_value_domain(input: Path):
-    if input.suffix != '.json':
-        raise Exception('Invalid Value Domain file. Must have .json extension')
-    with open(input, 'r') as f:
+def _load_single_value_domain(input: Path) -> Dict[str, ValueDomain]:
+    if input.suffix != ".json":
+        raise Exception("Invalid Value Domain file. Must have .json extension")
+    with open(input, "r") as f:
         vd = ValueDomain.from_dict(json.load(f))
     return {vd.name: vd}
-def load_value_domains(input: Union[dict, Path]):
+def load_value_domains(input: Union[Dict[str, Any], Path]) -> Dict[str, ValueDomain]:
     """
     Loads the value domains.
-    :param input: Dict or Path of the json file that contains the value domains data.
+    Args:
+        input: Dict or Path of the json file that contains the value domains data.
-    :return: A dictionary with the value domains data.
+    Returns:
+        A dictionary with the value domains data, or a list of dictionaries with them.
+    Raises:
+        Exception: If the value domains file is wrong, the Path is invalid, \
+        or the value domains file does not exist.
     """
     if isinstance(input, dict):
         vd = ValueDomain.from_dict(input)
         return {vd.name: vd}
     if not isinstance(input, Path):
-        raise Exception('Invalid vd file. Input is not a Path object')
+        raise Exception("Invalid vd file. Input is not a Path object")
     if not input.exists():
-        raise Exception('Invalid vd file. Input does not exist')
+        raise Exception("Invalid vd file. Input does not exist")
     if input.is_dir():
-        value_domains = {}
+        value_domains: Dict[str, Any] = {}
         for f in input.iterdir():
             vd = _load_single_value_domain(f)
             value_domains = {**value_domains, **vd}
         return value_domains
-    if input.suffix != '.json':
-        raise Exception('Invalid vd file. Must have .json extension')
+    if input.suffix != ".json":
+        raise Exception("Invalid vd file. Must have .json extension")
     return _load_single_value_domain(input)
-def load_external_routines(input: Union[dict, Path]) -> Optional[
-    Dict[str, ExternalRoutine]]:
+def load_external_routines(input: Union[Dict[str, Any], Path, str]) -> Any:
     """
     Load the external routines.
-    :param input: Dict or Path of the sql file that contains the external routine data.
+    Args:
+        input: Dict or Path of the sql file that contains the external routine data.
+    Returns:
+        A dictionary with the external routine data, or a list with \
+        the dictionaries from the Path given.
-    :return: A dictionary with the external routine data.
+    Raises:
+        Exception: If the sql file does not exist, the Path is wrong, or the file is not a sql one.
     """
     external_routines = {}
     if isinstance(input, dict):
@@ -258,12 +309,12 @@ def load_external_routines(input: Union[dict, Path]) -> Optional[
             external_routines[ext_routine.name] = ext_routine
         return external_routines
     if not isinstance(input, Path):
-        raise Exception('Input invalid. Input must be a sql file.')
+        raise Exception("Input invalid. Input must be a sql file.")
     if not input.exists():
-        raise Exception('Input invalid. Input does not exist')
+        raise Exception("Input invalid. Input does not exist")
     if input.is_dir():
         for f in input.iterdir():
-            if f.suffix != '.sql':
+            if f.suffix != ".sql":
                 continue
             ext_rout = _load_single_external_routine_from_file(f)
             external_routines[ext_rout.name] = ext_rout
@@ -273,53 +324,59 @@ def load_external_routines(input: Union[dict, Path]) -> Optional[
     return external_routines
-def _return_only_persistent_datasets(datasets: Dict[str, Dataset], ast: Start):
+def _return_only_persistent_datasets(
+    datasets: Dict[str, Dataset], ast: Start
+) -> Dict[str, Dataset]:
     """
     Returns only the datasets with a persistent assignment.
     """
     persistent = []
     for child in ast.children:
-        if isinstance(child, PersistentAssignment):
+        if isinstance(child, PersistentAssignment) and hasattr(child.left, "value"):
             persistent.append(child.left.value)
-    return {dataset.name: dataset for dataset in datasets.values() if
-            isinstance(dataset, Dataset) and dataset.name in persistent}
+    return {dataset.name: dataset for dataset in datasets.values() if dataset.name in persistent}
-def _load_single_external_routine_from_file(input: Path):
+def _load_single_external_routine_from_file(input: Path) -> Any:
     """
     Returns a single external routine.
     """
     if not isinstance(input, Path):
-        raise Exception('Input invalid')
+        raise Exception("Input invalid")
     if not input.exists():
-        raise Exception('Input does not exist')
-    if input.suffix != '.sql':
-        raise Exception('Input must be a sql file')
-    with open(input, 'r') as f:
-        ext_rout = ExternalRoutine.from_sql_query(input.name.removesuffix('.sql'), f.read())
+        raise Exception("Input does not exist")
+    if input.suffix != ".sql":
+        raise Exception("Input must be a sql file")
+    with open(input, "r") as f:
+        ext_rout = ExternalRoutine.from_sql_query(input.name.removesuffix(".sql"), f.read())
     return ext_rout
-def _check_output_folder(output_folder: Union[str, Path]):
+def _check_output_folder(output_folder: Union[str, Path]) -> None:
     """
     Check if the output folder exists. If not, it will create it.
     """
     if isinstance(output_folder, str):
-        if 's3://' in output_folder:
+        if "s3://" in output_folder:
             s3fs_obj = S3FileSystem()
             # Check if the S3 URI is valid
             if not s3fs_obj.exists(output_folder):
                 try:
                     s3fs_obj.mkdir(output_folder)
                 except Exception:
-                    raise Exception(f'Invalid output folder. S3 URI is invalid or it is not accessible: {output_folder}')
+                    raise Exception(
+                        f"Invalid output folder. S3 URI is invalid or "
+                        f"it is not accessible: {output_folder}"
+                    )
             return
         try:
             output_folder = Path(output_folder)
         except Exception:
-            raise Exception('Output folder must be a Path or S3 URI to a directory')
+            raise Exception("Output folder must be a Path or S3 URI to a directory")
-    if not isinstance(output_folder, Path) or not output_folder.is_dir():
-        raise Exception('Output folder must be a Path or S3 URI to a directory')
+    if not isinstance(output_folder, Path):
+        raise Exception("Output folder must be a Path or S3 URI to a directory")
     if not output_folder.exists():
+        if output_folder.suffix != "":
+            raise Exception("Output folder must be a Path or S3 URI to a directory")
         os.mkdir(output_folder)

vtlengine 1.0__py3-none-any.whl → 1.0.2__py3-none-any.whl

Potentially problematic release.

vtlengine 1.0py3-none-any.whl → 1.0.2py3-none-any.whl