PyPI - vtlengine - Versions diffs - 1.0__py3-none-any.whl → 1.0.2__py3-none-any.whl - Mend

vtlengine 1.0py3-none-any.whl → 1.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of vtlengine might be problematic. Click here for more details.

Files changed (56) hide show

vtlengine/API/_InternalApi.py +159 -102
vtlengine/API/__init__.py +110 -68
vtlengine/AST/ASTConstructor.py +188 -98
vtlengine/AST/ASTConstructorModules/Expr.py +402 -205
vtlengine/AST/ASTConstructorModules/ExprComponents.py +248 -104
vtlengine/AST/ASTConstructorModules/Terminals.py +158 -95
vtlengine/AST/ASTEncoders.py +1 -1
vtlengine/AST/ASTTemplate.py +24 -9
vtlengine/AST/ASTVisitor.py +8 -12
vtlengine/AST/DAG/__init__.py +43 -35
vtlengine/AST/DAG/_words.py +4 -4
vtlengine/AST/Grammar/Vtl.g4 +49 -20
vtlengine/AST/Grammar/VtlTokens.g4 +13 -1
vtlengine/AST/Grammar/lexer.py +2012 -1312
vtlengine/AST/Grammar/parser.py +7524 -4343
vtlengine/AST/Grammar/tokens.py +140 -128
vtlengine/AST/VtlVisitor.py +16 -5
vtlengine/AST/__init__.py +41 -11
vtlengine/DataTypes/NumericTypesHandling.py +5 -4
vtlengine/DataTypes/TimeHandling.py +196 -301
vtlengine/DataTypes/__init__.py +304 -218
vtlengine/Exceptions/__init__.py +96 -27
vtlengine/Exceptions/messages.py +149 -69
vtlengine/Interpreter/__init__.py +817 -497
vtlengine/Model/__init__.py +172 -121
vtlengine/Operators/Aggregation.py +156 -95
vtlengine/Operators/Analytic.py +167 -79
vtlengine/Operators/Assignment.py +7 -4
vtlengine/Operators/Boolean.py +27 -32
vtlengine/Operators/CastOperator.py +177 -131
vtlengine/Operators/Clause.py +137 -99
vtlengine/Operators/Comparison.py +148 -117
vtlengine/Operators/Conditional.py +290 -98
vtlengine/Operators/General.py +68 -47
vtlengine/Operators/HROperators.py +91 -72
vtlengine/Operators/Join.py +217 -118
vtlengine/Operators/Numeric.py +129 -46
vtlengine/Operators/RoleSetter.py +16 -15
vtlengine/Operators/Set.py +61 -36
vtlengine/Operators/String.py +213 -139
vtlengine/Operators/Time.py +467 -215
vtlengine/Operators/Validation.py +117 -76
vtlengine/Operators/__init__.py +340 -213
vtlengine/Utils/__init__.py +232 -41
vtlengine/__init__.py +1 -1
vtlengine/files/output/__init__.py +15 -6
vtlengine/files/output/_time_period_representation.py +10 -9
vtlengine/files/parser/__init__.py +79 -52
vtlengine/files/parser/_rfc_dialect.py +6 -5
vtlengine/files/parser/_time_checking.py +48 -37
vtlengine-1.0.2.dist-info/METADATA +245 -0
vtlengine-1.0.2.dist-info/RECORD +58 -0
{vtlengine-1.0.dist-info → vtlengine-1.0.2.dist-info}/WHEEL +1 -1
vtlengine-1.0.dist-info/METADATA +0 -104
vtlengine-1.0.dist-info/RECORD +0 -58
{vtlengine-1.0.dist-info → vtlengine-1.0.2.dist-info}/LICENSE.md +0 -0

vtlengine/Utils/__init__.py CHANGED Viewed

@@ -1,34 +1,222 @@
-from vtlengine.Operators.Aggregation import (Avg, Count, Max, Median, Min,
-                                   PopulationStandardDeviation,
-                                   PopulationVariance, SampleStandardDeviation, SampleVariance, Sum)
-from vtlengine.Operators.Analytic import (Avg as AvgAnalytic, Count as CountAnalytic, FirstValue, Lag,
-                                LastValue, Lead, Max as MaxAnalytic, Median as MedianAnalytic,
-                                Min as MinAnalytic,
-                                PopulationStandardDeviation as PopulationStandardDeviationAnalytic,
-                                PopulationVariance as PopulationVarianceAnalytic, Rank,
-                                RatioToReport,
-                                SampleStandardDeviation as SampleStandardDeviationAnalytic,
-                                SampleVariance as SampleVarianceAnalytic, Sum as SumAnalytic)
+from typing import Any, Dict
+from vtlengine.Operators.Aggregation import (
+    Avg,
+    Count,
+    Max,
+    Median,
+    Min,
+    PopulationStandardDeviation,
+    PopulationVariance,
+    SampleStandardDeviation,
+    SampleVariance,
+    Sum,
+)
+from vtlengine.Operators.Analytic import (
+    Avg as AvgAnalytic,
+    Count as CountAnalytic,
+    FirstValue,
+    Lag,
+    LastValue,
+    Lead,
+    Max as MaxAnalytic,
+    Median as MedianAnalytic,
+    Min as MinAnalytic,
+    PopulationStandardDeviation as PopulationStandardDeviationAnalytic,
+    PopulationVariance as PopulationVarianceAnalytic,
+    Rank,
+    RatioToReport,
+    SampleStandardDeviation as SampleStandardDeviationAnalytic,
+    SampleVariance as SampleVarianceAnalytic,
+    Sum as SumAnalytic,
+)
 from vtlengine.Operators.Boolean import And, Not, Or, Xor
-from vtlengine.Operators.Clause import Aggregate, Calc, Drop, Filter, Keep, Pivot, Rename, Sub, Unpivot
-from vtlengine.Operators.Comparison import Equal, Greater, GreaterEqual, In, IsNull, Less, LessEqual, \
-    NotEqual, NotIn, Match
+from vtlengine.Operators.Clause import (
+    Aggregate,
+    Calc,
+    Drop,
+    Filter,
+    Keep,
+    Pivot,
+    Rename,
+    Sub,
+    Unpivot,
+)
+from vtlengine.Operators.Comparison import (
+    Equal,
+    Greater,
+    GreaterEqual,
+    In,
+    IsNull,
+    Less,
+    LessEqual,
+    NotEqual,
+    NotIn,
+    Match,
+)
 from vtlengine.Operators.Conditional import Nvl
 from vtlengine.Operators.General import Alias, Membership
-from vtlengine.Operators.HROperators import HREqual, HRGreater, HRGreaterEqual, HRLess, HRLessEqual, \
-    HRBinPlus, HRBinMinus, HRUnPlus, HRUnMinus
+from vtlengine.Operators.HROperators import (
+    HREqual,
+    HRGreater,
+    HRGreaterEqual,
+    HRLess,
+    HRLessEqual,
+    HRBinPlus,
+    HRBinMinus,
+    HRUnPlus,
+    HRUnMinus,
+)
 from vtlengine.Operators.Join import Apply, CrossJoin, FullJoin, InnerJoin, LeftJoin
-from vtlengine.Operators.Numeric import AbsoluteValue, BinMinus, BinPlus, Ceil, Div, Exponential, Floor, \
-    Logarithm, Modulo, Mult, NaturalLogarithm, Power, Round, SquareRoot, Trunc, UnMinus, UnPlus
+from vtlengine.Operators.Numeric import (
+    AbsoluteValue,
+    BinMinus,
+    BinPlus,
+    Ceil,
+    Div,
+    Exponential,
+    Floor,
+    Logarithm,
+    Modulo,
+    Mult,
+    NaturalLogarithm,
+    Power,
+    Round,
+    SquareRoot,
+    Trunc,
+    UnMinus,
+    UnPlus,
+    Random,
+)
 from vtlengine.Operators.RoleSetter import Attribute, Identifier, Measure
 from vtlengine.Operators.Set import Intersection, Setdiff, Symdiff, Union
-from vtlengine.Operators.String import Concatenate, Length, Lower, Ltrim, Replace, Rtrim, Substr, Trim, Upper
-from vtlengine.Operators.Time import Flow_to_stock, Period_indicator, Stock_to_flow, Fill_time_series, \
-    Time_Shift
+from vtlengine.Operators.String import (
+    Concatenate,
+    Length,
+    Lower,
+    Ltrim,
+    Replace,
+    Rtrim,
+    Substr,
+    Trim,
+    Upper,
+)
+from vtlengine.Operators.Time import (
+    Flow_to_stock,
+    Period_indicator,
+    Stock_to_flow,
+    Fill_time_series,
+    Time_Shift,
+    Year,
+    Month,
+    Day_of_Month,
+    Day_of_Year,
+    Day_to_Year,
+    Day_to_Month,
+    Year_to_Day,
+    Month_to_Day,
+    Date_Diff,
+    Date_Add,
+)
-from vtlengine.AST.Grammar.tokens import *
+from vtlengine.AST.Grammar.tokens import (
+    MEMBERSHIP,
+    AND,
+    OR,
+    XOR,
+    EQ,
+    NEQ,
+    GT,
+    GTE,
+    LT,
+    LTE,
+    IN,
+    NOT_IN,
+    NVL,
+    PLUS,
+    MINUS,
+    MULT,
+    LOG,
+    MOD,
+    POWER,
+    DIV,
+    AS,
+    CONCAT,
+    TIMESHIFT,
+    CHARSET_MATCH,
+    NOT,
+    ABS,
+    EXP,
+    LN,
+    SQRT,
+    CEIL,
+    FLOOR,
+    ISNULL,
+    PERIOD_INDICATOR,
+    LEN,
+    LCASE,
+    LTRIM,
+    RTRIM,
+    TRIM,
+    UCASE,
+    FLOW_TO_STOCK,
+    STOCK_TO_FLOW,
+    ROUND,
+    TRUNC,
+    SUBSTR,
+    REPLACE,
+    FILL_TIME_SERIES,
+    IDENTIFIER,
+    ATTRIBUTE,
+    MEASURE,
+    CALC,
+    FILTER,
+    KEEP,
+    DROP,
+    RENAME,
+    PIVOT,
+    UNPIVOT,
+    SUBSPACE,
+    AGGREGATE,
+    APPLY,
+    UNION,
+    INTERSECT,
+    SYMDIFF,
+    SETDIFF,
+    MAX,
+    MIN,
+    SUM,
+    COUNT,
+    AVG,
+    MEDIAN,
+    STDDEV_POP,
+    STDDEV_SAMP,
+    VAR_POP,
+    VAR_SAMP,
+    LAG,
+    LEAD,
+    FIRST_VALUE,
+    LAST_VALUE,
+    RATIO_TO_REPORT,
+    RANK,
+    INNER_JOIN,
+    LEFT_JOIN,
+    FULL_JOIN,
+    CROSS_JOIN,
+    RANDOM,
+    DAYOFYEAR,
+    DAYOFMONTH,
+    MONTH,
+    YEAR,
+    DAYTOYEAR,
+    DAYTOMONTH,
+    YEARTODAY,
+    MONTHTODAY,
+    DATE_DIFF,
+    DATE_ADD,
+)
-BINARY_MAPPING = {
+BINARY_MAPPING: Dict[Any, Any] = {
     # General
     MEMBERSHIP: Membership,
     # Boolean
@@ -54,13 +242,15 @@ BINARY_MAPPING = {
     MOD: Modulo,
     POWER: Power,
     DIV: Div,
+    RANDOM: Random,
     # General
     AS: Alias,
     # String
     CONCAT: Concatenate,
     # Time
     TIMESHIFT: Time_Shift,
-    CHARSET_MATCH: Match
+    CHARSET_MATCH: Match,
+    DATE_DIFF: Date_Diff,
 }
 UNARY_MAPPING = {
@@ -88,7 +278,15 @@ UNARY_MAPPING = {
     # Time
     PERIOD_INDICATOR: Period_indicator,
     FLOW_TO_STOCK: Flow_to_stock,
-    STOCK_TO_FLOW: Stock_to_flow
+    STOCK_TO_FLOW: Stock_to_flow,
+    YEAR: Year,
+    MONTH: Month,
+    DAYOFMONTH: Day_of_Month,
+    DAYOFYEAR: Day_of_Year,
+    DAYTOYEAR: Day_to_Year,
+    DAYTOMONTH: Day_to_Month,
+    YEARTODAY: Year_to_Day,
+    MONTHTODAY: Month_to_Day,
 }
 PARAMETRIC_MAPPING = {
@@ -100,6 +298,7 @@ PARAMETRIC_MAPPING = {
     REPLACE: Replace,
     # Time
     FILL_TIME_SERIES: Fill_time_series,
+    DATE_ADD: Date_Add,
 }
 ROLE_SETTER_MAPPING = {
@@ -118,15 +317,10 @@ REGULAR_AGGREGATION_MAPPING = {
     UNPIVOT: Unpivot,
     SUBSPACE: Sub,
     AGGREGATE: Aggregate,
-    APPLY: Apply
+    APPLY: Apply,
 }
-SET_MAPPING = {
-    UNION: Union,
-    INTERSECT: Intersection,
-    SYMDIFF: Symdiff,
-    SETDIFF: Setdiff
-}
+SET_MAPPING = {UNION: Union, INTERSECT: Intersection, SYMDIFF: Symdiff, SETDIFF: Setdiff}
 AGGREGATION_MAPPING = {
     MAX: Max,
@@ -139,7 +333,6 @@ AGGREGATION_MAPPING = {
     STDDEV_SAMP: SampleStandardDeviation,
     VAR_POP: PopulationVariance,
     VAR_SAMP: SampleVariance,
 }
 ANALYTIC_MAPPING = {
@@ -158,18 +351,16 @@ ANALYTIC_MAPPING = {
     FIRST_VALUE: FirstValue,
     LAST_VALUE: LastValue,
     RATIO_TO_REPORT: RatioToReport,
-    RANK: Rank
+    RANK: Rank,
 }
-THEN_ELSE = {
-    'then': 'T',
-    'else': 'E'
-}
+THEN_ELSE = {"then": "T", "else": "E"}
 JOIN_MAPPING = {
     INNER_JOIN: InnerJoin,
     LEFT_JOIN: LeftJoin,
     FULL_JOIN: FullJoin,
-    CROSS_JOIN: CrossJoin
+    CROSS_JOIN: CrossJoin,
 }
 HR_COMP_MAPPING = {
@@ -190,7 +381,7 @@ HR_NUM_BINARY_MAPPING = {
 HR_UNARY_MAPPING = {
     # Numeric
     PLUS: HRUnPlus,
-    MINUS: HRUnMinus
+    MINUS: HRUnMinus,
 }
 HA_COMP_MAPPING = {
@@ -211,5 +402,5 @@ HA_NUM_BINARY_MAPPING = {
 HA_UNARY_MAPPING = {
     # Numeric
     PLUS: HRUnPlus,
-    MINUS: HRUnMinus
+    MINUS: HRUnMinus,
 }

vtlengine/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 from vtlengine.API import semantic_analysis, run
-__all__ = ['semantic_analysis', 'run']
+__all__ = ["semantic_analysis", "run"]

vtlengine/files/output/__init__.py CHANGED Viewed

@@ -1,16 +1,25 @@
 from pathlib import Path
-# from time import time
+from typing import Optional, Union
+import pandas as pd
 from vtlengine.Model import Dataset
-from vtlengine.files.output._time_period_representation import \
-    format_time_period_external_representation, TimePeriodRepresentation
+from vtlengine.files.output._time_period_representation import (
+    format_time_period_external_representation,
+    TimePeriodRepresentation,
+)
+def save_datapoints(
+    time_period_representation: Optional[TimePeriodRepresentation],
+    dataset: Dataset,
+    output_path: Union[str, Path],
+) -> None:
-def save_datapoints(time_period_representation: TimePeriodRepresentation,
-                    dataset: Dataset, output_path: str | Path):
+    if dataset.data is None:
+        dataset.data = pd.DataFrame()
     if time_period_representation is not None:
         format_time_period_external_representation(dataset, time_period_representation)
     if isinstance(output_path, str):
         if output_path.endswith("/"):
             s3_file_output = output_path + f"{dataset.name}.csv"

vtlengine/files/output/_time_period_representation.py CHANGED Viewed

@@ -8,23 +8,24 @@ from vtlengine.Model import Dataset, Scalar
 class TimePeriodRepresentation(Enum):
     # Time Period output format
-    SDMX_GREGORIAN = 'sdmx_gregorian'
-    SDMX_REPORTING = 'sdmx_reporting'
-    VTL = 'vtl'
+    SDMX_GREGORIAN = "sdmx_gregorian"
+    SDMX_REPORTING = "sdmx_reporting"
+    VTL = "vtl"
     @classmethod
-    def check_value(cls, value: str):
+    def check_value(cls, value: str) -> "TimePeriodRepresentation":
         if value not in cls._value2member_map_:
             raise Exception("Invalid Time Period Representation")
         return cls(value)
-def _format_vtl_representation(value: str):
+def _format_vtl_representation(value: str) -> str:
     return TimePeriodHandler(value).vtl_representation()
-def format_time_period_external_representation(dataset: Dataset | Scalar,
-                                               mode: TimePeriodRepresentation):
+def format_time_period_external_representation(
+    dataset: Dataset | Scalar, mode: TimePeriodRepresentation
+) -> None:
     """
     From SDMX time period representation to standard VTL representation (no hyphen).
     'A': 'nothing to do',
@@ -48,7 +49,7 @@ def format_time_period_external_representation(dataset: Dataset | Scalar,
     for comp in dataset.components.values():
         if comp.data_type == TimePeriod:
             dataset.data[comp.name] = dataset.data[comp.name].map(
-                _format_vtl_representation,
-                na_action='ignore')
+                _format_vtl_representation, na_action="ignore"
+            )
     return

vtlengine/files/parser/__init__.py CHANGED Viewed

@@ -1,12 +1,22 @@
+import warnings
 from csv import DictReader
 from pathlib import Path
-# from time import time
-from typing import Optional, Dict, Union
+from typing import Optional, Dict, Union, Any, Type, List
 import numpy as np
 import pandas as pd
-from vtlengine.DataTypes import Date, TimePeriod, TimeInterval, Integer, Number, Boolean, Duration, \
-    SCALAR_TYPES_CLASS_REVERSE
+from vtlengine.DataTypes import (
+    Date,
+    TimePeriod,
+    TimeInterval,
+    Integer,
+    Number,
+    Boolean,
+    Duration,
+    SCALAR_TYPES_CLASS_REVERSE,
+    ScalarType,
+)
 from vtlengine.DataTypes.TimeHandling import DURATION_MAPPING
 from vtlengine.files.parser._rfc_dialect import register_rfc
 from vtlengine.files.parser._time_checking import check_date, check_time_period, check_time
@@ -14,14 +24,14 @@ from vtlengine.files.parser._time_checking import check_date, check_time_period,
 from vtlengine.Exceptions import InputValidationException, SemanticError
 from vtlengine.Model import Component, Role, Dataset
-TIME_CHECKS_MAPPING = {
+TIME_CHECKS_MAPPING: Dict[Type[ScalarType], Any] = {
     Date: check_date,
     TimePeriod: check_time_period,
-    TimeInterval: check_time
+    TimeInterval: check_time,
 }
-def _validate_csv_path(components: Dict[str, Component], csv_path: Path):
+def _validate_csv_path(components: Dict[str, Component], csv_path: Path) -> None:
     # GE1 check if the file is empty
     if not csv_path.exists():
         raise Exception(f"Path {csv_path} does not exist.")
@@ -29,8 +39,8 @@ def _validate_csv_path(components: Dict[str, Component], csv_path: Path):
         raise Exception(f"Path {csv_path} is not a file.")
     register_rfc()
     try:
-        with open(csv_path, 'r') as f:
-            reader = DictReader(f, dialect='rfc')
+        with open(csv_path, "r") as f:
+            reader = DictReader(f, dialect="rfc")
             csv_columns = reader.fieldnames
     except UnicodeDecodeError as error:
@@ -45,21 +55,24 @@ def _validate_csv_path(components: Dict[str, Component], csv_path: Path):
         ) from None
     if not csv_columns:
-        raise InputValidationException(code='0-1-1-6', file=csv_path)
+        raise InputValidationException(code="0-1-1-6", file=csv_path)
     if len(list(set(csv_columns))) != len(csv_columns):
         duplicates = list(set([item for item in csv_columns if csv_columns.count(item) > 1]))
         raise Exception(f"Duplicated columns {', '.join(duplicates)} found in file.")
     comp_names = set([c.name for c in components.values() if c.role == Role.IDENTIFIER])
-    comps_missing = [id_m for id_m in comp_names if id_m not in reader.fieldnames]
+    comps_missing: Union[str, List[str]] = (
+        [id_m for id_m in comp_names if id_m not in reader.fieldnames] if reader.fieldnames else []
+    )
     if comps_missing:
         comps_missing = ", ".join(comps_missing)
-        raise InputValidationException(code='0-1-1-8', ids=comps_missing, file=str(csv_path.name))
+        raise InputValidationException(code="0-1-1-8", ids=comps_missing, file=str(csv_path.name))
-def _sanitize_pandas_columns(components: Dict[str, Component],
-                             csv_path: Union[str, Path], data: pd.DataFrame) -> pd.DataFrame:
+def _sanitize_pandas_columns(
+    components: Dict[str, Component], csv_path: Union[str, Path], data: pd.DataFrame
+) -> pd.DataFrame:
     # Fast loading from SDMX-CSV
     if "DATAFLOW" in data.columns and data.columns[0] == "DATAFLOW":
         if "DATAFLOW" not in components:
@@ -75,11 +88,11 @@ def _sanitize_pandas_columns(components: Dict[str, Component],
     # Validate identifiers
     comp_names = set([c.name for c in components.values() if c.role == Role.IDENTIFIER])
-    comps_missing = [id_m for id_m in comp_names if id_m not in data.columns]
+    comps_missing: Union[str, List[str]] = [id_m for id_m in comp_names if id_m not in data.columns]
     if comps_missing:
         comps_missing = ", ".join(comps_missing)
         file = csv_path if isinstance(csv_path, str) else csv_path.name
-        raise InputValidationException(code='0-1-1-7', ids=comps_missing, file=file)
+        raise InputValidationException(code="0-1-1-7", ids=comps_missing, file=file)
     # Fill rest of components with null values
     for comp_name, comp in components.items():
@@ -94,47 +107,54 @@ def _pandas_load_csv(components: Dict[str, Component], csv_path: Path) -> pd.Dat
     obj_dtypes = {comp_name: np.object_ for comp_name, comp in components.items()}
     try:
-        data = pd.read_csv(csv_path, dtype=obj_dtypes,
-                           engine='c',
-                           keep_default_na=False,
-                           na_values=[''])
-    except UnicodeDecodeError as error:
+        data = pd.read_csv(
+            csv_path, dtype=obj_dtypes, engine="c", keep_default_na=False, na_values=[""]
+        )
+    except UnicodeDecodeError:
         raise InputValidationException(code="0-1-2-5", file=csv_path.name)
     return _sanitize_pandas_columns(components, csv_path, data)
 def _pandas_load_s3_csv(components: Dict[str, Component], csv_path: str) -> pd.DataFrame:
     obj_dtypes = {comp_name: np.object_ for comp_name, comp in components.items()}
     # start = time()
     try:
-        data = pd.read_csv(csv_path, dtype=obj_dtypes,
-                           engine='c',
-                           keep_default_na=False,
-                           na_values=[''])
+        data = pd.read_csv(
+            csv_path, dtype=obj_dtypes, engine="c", keep_default_na=False, na_values=[""]
+        )
-    except UnicodeDecodeError as error:
+    except UnicodeDecodeError:
         raise InputValidationException(code="0-1-2-5", file=csv_path)
     except Exception as e:
         raise InputValidationException(f"ERROR: {str(e)}, review file {str(csv_path)}")
-    # print(f"Data loaded from {csv_path}, shape: {data.shape}")
-    # end = time()
-    # print(f"Time to load data from s3 URI: {end - start}")
     return _sanitize_pandas_columns(components, csv_path, data)
-def _parse_boolean(value: str):
+def _parse_boolean(value: str) -> bool:
+    if isinstance(value, bool):
+        return value
     if value.lower() == "true" or value == "1":
         return True
     return False
-def _validate_pandas(components: Dict[str, Component], data: pd.DataFrame,
-                     dataset_name: str) -> pd.DataFrame:
+def _validate_pandas(
+    components: Dict[str, Component], data: pd.DataFrame, dataset_name: str
+) -> pd.DataFrame:
+    warnings.filterwarnings("ignore", category=FutureWarning)
     # Identifier checking
     id_names = [comp_name for comp_name, comp in components.items() if comp.role == Role.IDENTIFIER]
+    missing_columns = [name for name in components.keys() if name not in data.columns.tolist()]
+    if missing_columns:
+        for name in missing_columns:
+            if components[name].nullable is False:
+                raise SemanticError("0-1-1-10", name=dataset_name, comp_name=name)
+            data[name] = None
     for id_name in id_names:
         if data[id_name].isnull().any():
             raise SemanticError("0-1-1-4", null_identifier=id_name, name=dataset_name)
@@ -150,35 +170,42 @@ def _validate_pandas(components: Dict[str, Component], data: pd.DataFrame,
         for comp_name, comp in components.items():
             if comp.data_type in (Date, TimePeriod, TimeInterval):
-                data[comp_name] = data[comp_name].map(TIME_CHECKS_MAPPING[comp.data_type],
-                                                      na_action='ignore')
+                data[comp_name] = data[comp_name].map(
+                    TIME_CHECKS_MAPPING[comp.data_type], na_action="ignore"
+                )
             elif comp.data_type == Integer:
-                data[comp_name] = data[comp_name].map(lambda x: Integer.cast(float(x)),
-                                                      na_action='ignore')
+                data[comp_name] = data[comp_name].map(
+                    lambda x: Integer.cast(float(x)), na_action="ignore"
+                )
             elif comp.data_type == Number:
-                data[comp_name] = data[comp_name].map(lambda x: float(x), na_action='ignore')
+                data[comp_name] = data[comp_name].map(lambda x: float(x), na_action="ignore")
             elif comp.data_type == Boolean:
-                data[comp_name] = data[comp_name].map(lambda x: _parse_boolean(x),
-                                                      na_action='ignore')
+                data[comp_name] = data[comp_name].map(
+                    lambda x: _parse_boolean(x), na_action="ignore"
+                )
             elif comp.data_type == Duration:
-                values_correct = data[comp_name].map(
-                    lambda x: x.replace(" ", "") in DURATION_MAPPING, na_action='ignore').all()
+                values_correct = (
+                    data[comp_name]
+                    .map(lambda x: x.replace(" ", "") in DURATION_MAPPING, na_action="ignore")
+                    .all()
+                )
                 if not values_correct:
                     raise ValueError(f"Duration values are not correct in column {comp_name}")
             else:
-                data[comp_name] = data[comp_name].map(lambda x: str(x).replace('"', ''),
-                                                      na_action='ignore')
-            data[comp_name] = data[comp_name].astype(np.object_, errors='raise')
-    except ValueError as e:
-        str_comp = SCALAR_TYPES_CLASS_REVERSE[comp.data_type]
+                data[comp_name] = data[comp_name].map(
+                    lambda x: str(x).replace('"', ""), na_action="ignore"
+                )
+            data[comp_name] = data[comp_name].astype(np.object_, errors="raise")
+    except ValueError:
+        str_comp = SCALAR_TYPES_CLASS_REVERSE[comp.data_type] if comp else "Null"
         raise SemanticError("0-1-1-12", name=dataset_name, column=comp_name, type=str_comp)
     return data
-def load_datapoints(components: Dict[str, Component],
-                    dataset_name: str,
-                    csv_path: Optional[Union[Path, str]] = None):
+def load_datapoints(
+    components: Dict[str, Component], dataset_name: str, csv_path: Optional[Union[Path, str]] = None
+) -> pd.DataFrame:
     if csv_path is None or (isinstance(csv_path, Path) and not csv_path.exists()):
         return pd.DataFrame(columns=list(components.keys()))
     elif isinstance(csv_path, str):
@@ -193,5 +220,5 @@ def load_datapoints(components: Dict[str, Component],
     return data
-def _fill_dataset_empty_data(dataset: Dataset):
+def _fill_dataset_empty_data(dataset: Dataset) -> None:
     dataset.data = pd.DataFrame(columns=list(dataset.components.keys()))

vtlengine/files/parser/_rfc_dialect.py CHANGED Viewed

@@ -6,9 +6,10 @@ class RFCDialect(csv.Dialect):
     https://docs.python.org/3/library/csv.html#csv.Dialect
     https://tools.ietf.org/html/rfc4180
     """
-    delimiter = ','
+    delimiter = ","
     doublequote = True
-    lineterminator = '\r\n'
+    lineterminator = "\r\n"
     quotechar = '"'
     quoting = csv.QUOTE_MINIMAL
     strict = True
@@ -16,6 +17,6 @@ class RFCDialect(csv.Dialect):
     skipinitialspace = False
-def register_rfc():
-    """ Register the RFC dialect. """
-    csv.register_dialect('rfc', RFCDialect)
+def register_rfc() -> None:
+    """Register the RFC dialect."""
+    csv.register_dialect("rfc", RFCDialect)

vtlengine 1.0__py3-none-any.whl → 1.0.2__py3-none-any.whl

Potentially problematic release.

vtlengine 1.0py3-none-any.whl → 1.0.2py3-none-any.whl