vtlengine 1.1.1__tar.gz → 1.2.1rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/PKG-INFO +4 -4
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/README.md +1 -1
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/pyproject.toml +11 -10
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/API/_InternalApi.py +62 -28
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/API/__init__.py +25 -9
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/AST/ASTConstructorModules/Expr.py +6 -3
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/AST/DAG/__init__.py +34 -5
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/AST/DAG/_words.py +1 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/AST/Grammar/Vtl.g4 +7 -7
- vtlengine-1.2.1rc1/src/vtlengine/AST/Grammar/lexer.py +20785 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/AST/Grammar/parser.py +17996 -3199
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/Exceptions/messages.py +5 -2
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/Interpreter/__init__.py +50 -7
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/Operators/Aggregation.py +8 -3
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/Operators/Analytic.py +3 -2
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/Operators/CastOperator.py +5 -2
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/Operators/Clause.py +26 -18
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/Operators/Comparison.py +3 -1
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/Operators/Conditional.py +35 -26
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/Operators/General.py +3 -1
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/Operators/HROperators.py +3 -1
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/Operators/Join.py +9 -2
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/Operators/Time.py +11 -5
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/Operators/Validation.py +5 -2
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/Operators/__init__.py +15 -8
- vtlengine-1.2.1rc1/src/vtlengine/Utils/__Virtual_Assets.py +34 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/__init__.py +1 -1
- vtlengine-1.1.1/src/vtlengine/AST/Grammar/lexer.py +0 -2138
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/LICENSE.md +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/API/data/schema/json_schema_2.1.json +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/AST/ASTComment.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/AST/ASTConstructor.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/AST/ASTConstructorModules/ExprComponents.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/AST/ASTConstructorModules/Terminals.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/AST/ASTConstructorModules/__init__.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/AST/ASTDataExchange.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/AST/ASTEncoders.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/AST/ASTString.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/AST/ASTTemplate.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/AST/ASTVisitor.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/AST/Grammar/VtlTokens.g4 +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/AST/Grammar/__init__.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/AST/Grammar/tokens.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/AST/VtlVisitor.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/AST/__init__.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/DataTypes/TimeHandling.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/DataTypes/__init__.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/Exceptions/__init__.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/Model/__init__.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/Operators/Assignment.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/Operators/Boolean.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/Operators/Numeric.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/Operators/RoleSetter.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/Operators/Set.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/Operators/String.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/Utils/__init__.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/__extras_check.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/files/__init__.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/files/output/__init__.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/files/output/_time_period_representation.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/files/parser/__init__.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/files/parser/_rfc_dialect.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/files/parser/_time_checking.py +0 -0
- {vtlengine-1.1.1 → vtlengine-1.2.1rc1}/src/vtlengine/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: vtlengine
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.1rc1
|
|
4
4
|
Summary: Run and Validate VTL Scripts
|
|
5
5
|
License: AGPL-3.0
|
|
6
6
|
Keywords: vtl,sdmx,vtlengine,Validation and Transformation Language
|
|
@@ -16,7 +16,7 @@ Classifier: Intended Audience :: Science/Research
|
|
|
16
16
|
Classifier: Typing :: Typed
|
|
17
17
|
Provides-Extra: all
|
|
18
18
|
Provides-Extra: s3
|
|
19
|
-
Requires-Dist: antlr4-python3-runtime (>=4.
|
|
19
|
+
Requires-Dist: antlr4-python3-runtime (>=4.13.2,<4.14)
|
|
20
20
|
Requires-Dist: duckdb (>=1.1,<1.2)
|
|
21
21
|
Requires-Dist: fsspec (>=2022.11.0,<2023.0) ; extra == "all"
|
|
22
22
|
Requires-Dist: fsspec (>=2022.11.0,<2023.0) ; extra == "s3"
|
|
@@ -25,7 +25,7 @@ Requires-Dist: networkx (>=2.8,<3.0)
|
|
|
25
25
|
Requires-Dist: numpy (>=1.23.2,<2) ; python_version < "3.13"
|
|
26
26
|
Requires-Dist: numpy (>=2.1.0) ; python_version >= "3.13"
|
|
27
27
|
Requires-Dist: pandas (>=2.1.4,<3.0)
|
|
28
|
-
Requires-Dist: pysdmx[xml] (>=1.
|
|
28
|
+
Requires-Dist: pysdmx[xml] (>=1.4.0rc1,<2.0)
|
|
29
29
|
Requires-Dist: s3fs (>=2022.11.0,<2023.0) ; extra == "all"
|
|
30
30
|
Requires-Dist: s3fs (>=2022.11.0,<2023.0) ; extra == "s3"
|
|
31
31
|
Requires-Dist: sqlglot (>=22.2.0,<23.0)
|
|
@@ -43,7 +43,7 @@ Description-Content-Type: text/markdown
|
|
|
43
43
|
| Testing | [](https://github.com/Meaningful-Data/vtlengine/actions/workflows/testing.yml) |
|
|
44
44
|
| Package | [](https://pypi.org/project/vtlengine/) |
|
|
45
45
|
| License | [](https://github.com/Meaningful-Data/vtlengine/blob/main/LICENSE.md) |
|
|
46
|
-
| Mentioned in | [](
|
|
46
|
+
| Mentioned in | [](https://github.com/SNStatComp/awesome-official-statistics-software) |
|
|
47
47
|
|
|
48
48
|
## Introduction
|
|
49
49
|
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
| Testing | [](https://github.com/Meaningful-Data/vtlengine/actions/workflows/testing.yml) |
|
|
6
6
|
| Package | [](https://pypi.org/project/vtlengine/) |
|
|
7
7
|
| License | [](https://github.com/Meaningful-Data/vtlengine/blob/main/LICENSE.md) |
|
|
8
|
-
| Mentioned in | [](
|
|
8
|
+
| Mentioned in | [](https://github.com/SNStatComp/awesome-official-statistics-software) |
|
|
9
9
|
|
|
10
10
|
## Introduction
|
|
11
11
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "vtlengine"
|
|
3
|
-
version = "1.
|
|
3
|
+
version = "1.2.1rc1"
|
|
4
4
|
description = "Run and Validate VTL Scripts"
|
|
5
5
|
license = "AGPL-3.0"
|
|
6
6
|
readme = "README.md"
|
|
@@ -25,11 +25,11 @@ keywords = ['vtl', 'sdmx', 'vtlengine', 'Validation and Transformation Language'
|
|
|
25
25
|
dependencies = [
|
|
26
26
|
# PyPi dependencies
|
|
27
27
|
"duckdb>=1.1,<1.2",
|
|
28
|
-
"pysdmx[xml]>=1.
|
|
28
|
+
"pysdmx[xml]>=1.4.0rc1,<2.0",
|
|
29
29
|
# APT-supported dependencies
|
|
30
30
|
"jsonschema>=3.2.0,<5.0",
|
|
31
31
|
"sqlglot>=22.2.0,<23.0",
|
|
32
|
-
"antlr4-python3-runtime>=4.
|
|
32
|
+
"antlr4-python3-runtime>=4.13.2,<4.14",
|
|
33
33
|
"pandas>=2.1.4,<3.0",
|
|
34
34
|
"networkx>=2.8,<3.0",
|
|
35
35
|
"numpy>=1.23.2,<2 ; python_version < '3.13'",
|
|
@@ -52,12 +52,13 @@ python = ">=3.9,<4.0"
|
|
|
52
52
|
|
|
53
53
|
[tool.poetry.group.dev.dependencies]
|
|
54
54
|
pytest = "^8.4"
|
|
55
|
-
pytest-cov = "^6.
|
|
55
|
+
pytest-cov = "^6.2.1"
|
|
56
|
+
pytest-xdist = "^3.8.0"
|
|
56
57
|
line-profiler-pycharm = "^1.2.0"
|
|
57
|
-
mypy = "1.
|
|
58
|
-
pandas-stubs = "
|
|
59
|
-
ruff = "^0.
|
|
60
|
-
types-jsonschema = "4.
|
|
58
|
+
mypy = "1.17.1"
|
|
59
|
+
pandas-stubs = "2.2.2.240807"
|
|
60
|
+
ruff = "^0.12.7"
|
|
61
|
+
types-jsonschema = "4.25.0.20250720"
|
|
61
62
|
|
|
62
63
|
[tool.poetry.group.docs.dependencies]
|
|
63
64
|
sphinx = "^7.4.7"
|
|
@@ -75,8 +76,8 @@ lint.select = [
|
|
|
75
76
|
# TODO: check S608 (duckdb querys)
|
|
76
77
|
lint.ignore = ["B023", "B028", "B904", "C403", "D100", "D101", "D102", "D103", "D104", "D105",
|
|
77
78
|
"D107", "D200", "D201", "D202", "D203", "D205", "D209", "D212", "D213", "D301",
|
|
78
|
-
"D400", "D401", "D404", "D411", "D413", "D415", "D419", "E203", "
|
|
79
|
-
lint.exclude = ["*/Grammar/*"]
|
|
79
|
+
"D400", "D401", "D404", "D411", "D413", "D415", "D419", "E203", "S608"]
|
|
80
|
+
lint.exclude = ["*/Grammar/*", "*/main.py"]
|
|
80
81
|
|
|
81
82
|
[tool.ruff.lint.per-file-ignores]
|
|
82
83
|
"tests/*" = ["S101", "PT006", "PT012", "PT013", "E501", "W605"]
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Any, Dict, List, Literal, Optional, Union
|
|
4
|
+
from typing import Any, Dict, List, Literal, Optional, Tuple, Union
|
|
5
5
|
|
|
6
6
|
import jsonschema
|
|
7
7
|
import pandas as pd
|
|
@@ -22,7 +22,11 @@ from vtlengine.__extras_check import __check_s3_extra
|
|
|
22
22
|
from vtlengine.AST import Assignment, DPRuleset, HRuleset, Operator, PersistentAssignment, Start
|
|
23
23
|
from vtlengine.AST.ASTString import ASTString
|
|
24
24
|
from vtlengine.DataTypes import SCALAR_TYPES
|
|
25
|
-
from vtlengine.Exceptions import
|
|
25
|
+
from vtlengine.Exceptions import (
|
|
26
|
+
InputValidationException,
|
|
27
|
+
SemanticError,
|
|
28
|
+
check_key,
|
|
29
|
+
)
|
|
26
30
|
from vtlengine.files.parser import _fill_dataset_empty_data, _validate_pandas
|
|
27
31
|
from vtlengine.Model import (
|
|
28
32
|
Component as VTL_Component,
|
|
@@ -44,11 +48,14 @@ with open(schema_path / "json_schema_2.1.json", "r") as file:
|
|
|
44
48
|
schema = json.load(file)
|
|
45
49
|
|
|
46
50
|
|
|
47
|
-
def _load_dataset_from_structure(
|
|
51
|
+
def _load_dataset_from_structure(
|
|
52
|
+
structures: Dict[str, Any],
|
|
53
|
+
) -> Tuple[Dict[str, Any], Dict[str, Any]]:
|
|
48
54
|
"""
|
|
49
55
|
Loads a dataset with the structure given.
|
|
50
56
|
"""
|
|
51
57
|
datasets = {}
|
|
58
|
+
scalars = {}
|
|
52
59
|
|
|
53
60
|
if "datasets" in structures:
|
|
54
61
|
for dataset_json in structures["datasets"]:
|
|
@@ -110,8 +117,8 @@ def _load_dataset_from_structure(structures: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
110
117
|
data_type=SCALAR_TYPES[scalar_json["type"]],
|
|
111
118
|
value=None,
|
|
112
119
|
)
|
|
113
|
-
|
|
114
|
-
return datasets
|
|
120
|
+
scalars[scalar_name] = scalar
|
|
121
|
+
return datasets, scalars
|
|
115
122
|
|
|
116
123
|
|
|
117
124
|
def _load_single_datapoint(datapoint: Union[str, Path]) -> Dict[str, Any]:
|
|
@@ -159,7 +166,9 @@ def _load_datapoints_path(
|
|
|
159
166
|
return _load_single_datapoint(datapoints)
|
|
160
167
|
|
|
161
168
|
|
|
162
|
-
def _load_datastructure_single(
|
|
169
|
+
def _load_datastructure_single(
|
|
170
|
+
data_structure: Union[Dict[str, Any], Path],
|
|
171
|
+
) -> Tuple[Dict[str, Dataset], Dict[str, Scalar]]:
|
|
163
172
|
"""
|
|
164
173
|
Loads a single data structure.
|
|
165
174
|
"""
|
|
@@ -170,13 +179,15 @@ def _load_datastructure_single(data_structure: Union[Dict[str, Any], Path]) -> D
|
|
|
170
179
|
if not data_structure.exists():
|
|
171
180
|
raise Exception("Invalid datastructure. Input does not exist")
|
|
172
181
|
if data_structure.is_dir():
|
|
173
|
-
datasets: Dict[str,
|
|
182
|
+
datasets: Dict[str, Dataset] = {}
|
|
183
|
+
scalars: Dict[str, Scalar] = {}
|
|
174
184
|
for f in data_structure.iterdir():
|
|
175
185
|
if f.suffix != ".json":
|
|
176
186
|
continue
|
|
177
|
-
|
|
178
|
-
datasets = {**datasets, **
|
|
179
|
-
|
|
187
|
+
ds, sc = _load_datastructure_single(f)
|
|
188
|
+
datasets = {**datasets, **ds}
|
|
189
|
+
scalars = {**scalars, **sc}
|
|
190
|
+
return datasets, scalars
|
|
180
191
|
else:
|
|
181
192
|
if data_structure.suffix != ".json":
|
|
182
193
|
raise Exception("Invalid datastructure. Must have .json extension")
|
|
@@ -187,7 +198,7 @@ def _load_datastructure_single(data_structure: Union[Dict[str, Any], Path]) -> D
|
|
|
187
198
|
|
|
188
199
|
def load_datasets(
|
|
189
200
|
data_structure: Union[Dict[str, Any], Path, List[Dict[str, Any]], List[Path]],
|
|
190
|
-
) -> Dict[str, Dataset]:
|
|
201
|
+
) -> Tuple[Dict[str, Dataset], Dict[str, Scalar]]:
|
|
191
202
|
"""
|
|
192
203
|
Loads multiple datasets.
|
|
193
204
|
|
|
@@ -205,21 +216,42 @@ def load_datasets(
|
|
|
205
216
|
if isinstance(data_structure, dict):
|
|
206
217
|
return _load_datastructure_single(data_structure)
|
|
207
218
|
if isinstance(data_structure, list):
|
|
208
|
-
ds_structures: Dict[str,
|
|
219
|
+
ds_structures: Dict[str, Dataset] = {}
|
|
220
|
+
scalar_structures: Dict[str, Scalar] = {}
|
|
209
221
|
for x in data_structure:
|
|
210
|
-
|
|
211
|
-
ds_structures = {**ds_structures, **
|
|
212
|
-
|
|
222
|
+
ds, sc = _load_datastructure_single(x)
|
|
223
|
+
ds_structures = {**ds_structures, **ds} # Overwrite ds_structures dict.
|
|
224
|
+
scalar_structures = {**scalar_structures, **sc} # Overwrite scalar_structures dict.
|
|
225
|
+
return ds_structures, scalar_structures
|
|
213
226
|
return _load_datastructure_single(data_structure)
|
|
214
227
|
|
|
215
228
|
|
|
216
|
-
def
|
|
229
|
+
def _handle_scalars_values(
|
|
230
|
+
scalars: Dict[str, Scalar],
|
|
231
|
+
scalar_values: Optional[Dict[str, Optional[Union[int, str, bool, float]]]] = None,
|
|
232
|
+
) -> None:
|
|
233
|
+
if scalar_values is None:
|
|
234
|
+
return
|
|
235
|
+
# Handling scalar values with the scalar dict
|
|
236
|
+
for name, value in scalar_values.items():
|
|
237
|
+
if name not in scalars:
|
|
238
|
+
raise Exception(f"Not found scalar {name} in datastructures")
|
|
239
|
+
# Casting value to scalar data type
|
|
240
|
+
scalars[name].value = scalars[name].data_type.cast(value)
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def load_datasets_with_data(
|
|
244
|
+
data_structures: Any,
|
|
245
|
+
datapoints: Optional[Any] = None,
|
|
246
|
+
scalar_values: Optional[Dict[str, Optional[Union[int, str, bool, float]]]] = None,
|
|
247
|
+
) -> Any:
|
|
217
248
|
"""
|
|
218
249
|
Loads the dataset structures and fills them with the data contained in the datapoints.
|
|
219
250
|
|
|
220
251
|
Args:
|
|
221
252
|
data_structures: Dict, Path or a List of dicts or Paths.
|
|
222
253
|
datapoints: Dict, Path or a List of Paths.
|
|
254
|
+
scalar_values: Dict with the scalar values.
|
|
223
255
|
|
|
224
256
|
Returns:
|
|
225
257
|
A dict with the structure and a pandas dataframe with the data.
|
|
@@ -227,17 +259,18 @@ def load_datasets_with_data(data_structures: Any, datapoints: Optional[Any] = No
|
|
|
227
259
|
Raises:
|
|
228
260
|
Exception: If the Path is wrong or the file is invalid.
|
|
229
261
|
"""
|
|
230
|
-
datasets = load_datasets(data_structures)
|
|
262
|
+
datasets, scalars = load_datasets(data_structures)
|
|
231
263
|
if datapoints is None:
|
|
232
264
|
for dataset in datasets.values():
|
|
233
265
|
if isinstance(dataset, Dataset):
|
|
234
266
|
_fill_dataset_empty_data(dataset)
|
|
235
|
-
|
|
267
|
+
_handle_scalars_values(scalars, scalar_values)
|
|
268
|
+
return datasets, scalars, None
|
|
236
269
|
if isinstance(datapoints, dict):
|
|
237
270
|
# Handling dictionary of Pandas Dataframes
|
|
238
271
|
for dataset_name, data in datapoints.items():
|
|
239
272
|
if dataset_name not in datasets:
|
|
240
|
-
raise Exception(f"Not found dataset {dataset_name}")
|
|
273
|
+
raise Exception(f"Not found dataset {dataset_name} in datastructures.")
|
|
241
274
|
datasets[dataset_name].data = _validate_pandas(
|
|
242
275
|
datasets[dataset_name].components, data, dataset_name
|
|
243
276
|
)
|
|
@@ -246,14 +279,17 @@ def load_datasets_with_data(data_structures: Any, datapoints: Optional[Any] = No
|
|
|
246
279
|
datasets[dataset_name].data = pd.DataFrame(
|
|
247
280
|
columns=list(datasets[dataset_name].components.keys())
|
|
248
281
|
)
|
|
249
|
-
|
|
282
|
+
_handle_scalars_values(scalars, scalar_values)
|
|
283
|
+
return datasets, scalars, None
|
|
250
284
|
# Handling dictionary of paths
|
|
251
285
|
dict_datapoints = _load_datapoints_path(datapoints)
|
|
252
286
|
for dataset_name, _ in dict_datapoints.items():
|
|
253
287
|
if dataset_name not in datasets:
|
|
254
|
-
raise Exception(f"Not found dataset {dataset_name}")
|
|
288
|
+
raise Exception(f"Not found dataset {dataset_name} in datastructures.")
|
|
289
|
+
|
|
290
|
+
_handle_scalars_values(scalars, scalar_values)
|
|
255
291
|
|
|
256
|
-
return datasets, dict_datapoints
|
|
292
|
+
return datasets, scalars, dict_datapoints
|
|
257
293
|
|
|
258
294
|
|
|
259
295
|
def load_vtl(input: Union[str, Path]) -> str:
|
|
@@ -362,8 +398,8 @@ def load_external_routines(input: Union[Dict[str, Any], Path, str]) -> Any:
|
|
|
362
398
|
|
|
363
399
|
|
|
364
400
|
def _return_only_persistent_datasets(
|
|
365
|
-
datasets: Dict[str, Dataset], ast: Start
|
|
366
|
-
) -> Dict[str, Dataset]:
|
|
401
|
+
datasets: Dict[str, Union[Dataset, Scalar]], ast: Start
|
|
402
|
+
) -> Dict[str, Union[Dataset, Scalar]]:
|
|
367
403
|
"""
|
|
368
404
|
Returns only the datasets with a persistent assignment.
|
|
369
405
|
"""
|
|
@@ -606,11 +642,9 @@ def _check_script(script: Union[str, TransformationScheme, Path]) -> str:
|
|
|
606
642
|
Check if the TransformationScheme object is valid to generate a vtl script.
|
|
607
643
|
"""
|
|
608
644
|
if not isinstance(script, (str, TransformationScheme, Path)):
|
|
609
|
-
raise
|
|
610
|
-
"Invalid script format. Input must be a string, TransformationScheme or Path object"
|
|
611
|
-
)
|
|
645
|
+
raise SemanticError("0-1-1-1", format_=type(script).__name__)
|
|
612
646
|
if isinstance(script, TransformationScheme):
|
|
613
|
-
from pysdmx.toolkit.vtl
|
|
647
|
+
from pysdmx.toolkit.vtl import (
|
|
614
648
|
generate_vtl_script,
|
|
615
649
|
)
|
|
616
650
|
|
|
@@ -35,7 +35,7 @@ from vtlengine.files.output._time_period_representation import (
|
|
|
35
35
|
format_time_period_external_representation,
|
|
36
36
|
)
|
|
37
37
|
from vtlengine.Interpreter import InterpreterAnalyzer
|
|
38
|
-
from vtlengine.Model import Dataset
|
|
38
|
+
from vtlengine.Model import Dataset, Scalar
|
|
39
39
|
|
|
40
40
|
pd.options.mode.chained_assignment = None
|
|
41
41
|
|
|
@@ -180,7 +180,7 @@ def semantic_analysis(
|
|
|
180
180
|
ast = create_ast(vtl)
|
|
181
181
|
|
|
182
182
|
# Loading datasets
|
|
183
|
-
|
|
183
|
+
datasets, scalars = load_datasets(data_structures)
|
|
184
184
|
|
|
185
185
|
# Handling of library items
|
|
186
186
|
vd = None
|
|
@@ -192,9 +192,10 @@ def semantic_analysis(
|
|
|
192
192
|
|
|
193
193
|
# Running the interpreter
|
|
194
194
|
interpreter = InterpreterAnalyzer(
|
|
195
|
-
datasets=
|
|
195
|
+
datasets=datasets,
|
|
196
196
|
value_domains=vd,
|
|
197
197
|
external_routines=ext_routines,
|
|
198
|
+
scalars=scalars,
|
|
198
199
|
only_semantic=True,
|
|
199
200
|
)
|
|
200
201
|
result = interpreter.visit(ast)
|
|
@@ -210,7 +211,8 @@ def run(
|
|
|
210
211
|
time_period_output_format: str = "vtl",
|
|
211
212
|
return_only_persistent: bool = True,
|
|
212
213
|
output_folder: Optional[Union[str, Path]] = None,
|
|
213
|
-
|
|
214
|
+
scalar_values: Optional[Dict[str, Optional[Union[int, str, bool, float]]]] = None,
|
|
215
|
+
) -> Dict[str, Union[Dataset, Scalar]]:
|
|
214
216
|
"""
|
|
215
217
|
Run is the main function of the ``API``, which mission is to execute
|
|
216
218
|
the vtl operation over the data.
|
|
@@ -276,6 +278,8 @@ def run(
|
|
|
276
278
|
|
|
277
279
|
output_folder: Path or S3 URI to the output folder. (default: None)
|
|
278
280
|
|
|
281
|
+
scalar_values: Dict with the scalar values to be used in the VTL script. \
|
|
282
|
+
|
|
279
283
|
|
|
280
284
|
Returns:
|
|
281
285
|
The datasets are produced without data if the output folder is defined.
|
|
@@ -292,7 +296,9 @@ def run(
|
|
|
292
296
|
ast = create_ast(vtl)
|
|
293
297
|
|
|
294
298
|
# Loading datasets and datapoints
|
|
295
|
-
datasets, path_dict = load_datasets_with_data(
|
|
299
|
+
datasets, scalars, path_dict = load_datasets_with_data(
|
|
300
|
+
data_structures, datapoints, scalar_values
|
|
301
|
+
)
|
|
296
302
|
|
|
297
303
|
# Handling of library items
|
|
298
304
|
vd = None
|
|
@@ -322,13 +328,15 @@ def run(
|
|
|
322
328
|
output_path=output_folder,
|
|
323
329
|
time_period_representation=time_period_representation,
|
|
324
330
|
return_only_persistent=return_only_persistent,
|
|
331
|
+
scalars=scalars,
|
|
325
332
|
)
|
|
326
333
|
result = interpreter.visit(ast)
|
|
327
334
|
|
|
328
335
|
# Applying time period output format
|
|
329
336
|
if output_folder is None:
|
|
330
|
-
for
|
|
331
|
-
|
|
337
|
+
for obj in result.values():
|
|
338
|
+
if isinstance(obj, (Dataset, Scalar)):
|
|
339
|
+
format_time_period_external_representation(obj, time_period_representation)
|
|
332
340
|
|
|
333
341
|
# Returning only persistent datasets
|
|
334
342
|
if return_only_persistent:
|
|
@@ -345,7 +353,7 @@ def run_sdmx( # noqa: C901
|
|
|
345
353
|
time_period_output_format: str = "vtl",
|
|
346
354
|
return_only_persistent: bool = True,
|
|
347
355
|
output_folder: Optional[Union[str, Path]] = None,
|
|
348
|
-
) -> Dict[str, Dataset]:
|
|
356
|
+
) -> Dict[str, Union[Dataset, Scalar]]:
|
|
349
357
|
"""
|
|
350
358
|
Executes a VTL script using a list of pysdmx `PandasDataset` objects.
|
|
351
359
|
|
|
@@ -403,8 +411,16 @@ def run_sdmx( # noqa: C901
|
|
|
403
411
|
mapping_dict = {}
|
|
404
412
|
input_names = _extract_input_datasets(script)
|
|
405
413
|
|
|
406
|
-
|
|
414
|
+
if not isinstance(datasets, (list, set)) or any(
|
|
415
|
+
not isinstance(ds, PandasDataset) for ds in datasets
|
|
416
|
+
):
|
|
417
|
+
type_ = type(datasets).__name__
|
|
418
|
+
if isinstance(datasets, (list, set)):
|
|
419
|
+
object_typing = {type(o).__name__ for o in datasets}
|
|
420
|
+
type_ = f"{type_}[{', '.join(object_typing)}]"
|
|
421
|
+
raise SemanticError("0-1-3-7", type_=type_)
|
|
407
422
|
|
|
423
|
+
# Mapping handling
|
|
408
424
|
if mappings is None:
|
|
409
425
|
if len(datasets) != 1:
|
|
410
426
|
raise SemanticError("0-1-3-3")
|
|
@@ -840,8 +840,8 @@ class Expr(VtlVisitor):
|
|
|
840
840
|
Parser.DayOfYearAtomContext,
|
|
841
841
|
Parser.DayToYearAtomContext,
|
|
842
842
|
Parser.DayToMonthAtomContext,
|
|
843
|
-
Parser.
|
|
844
|
-
Parser.
|
|
843
|
+
Parser.YearToDayAtomContext,
|
|
844
|
+
Parser.MonthToDayAtomContext,
|
|
845
845
|
),
|
|
846
846
|
):
|
|
847
847
|
return self.visitTimeUnaryAtom(ctx)
|
|
@@ -1901,7 +1901,10 @@ class Expr(VtlVisitor):
|
|
|
1901
1901
|
|
|
1902
1902
|
left_node = Terminals().visitVarID(ctx_list[0])
|
|
1903
1903
|
op_node = ctx_list[1].getSymbol().text
|
|
1904
|
-
|
|
1904
|
+
if isinstance(ctx_list[2], Parser.ScalarItemContext):
|
|
1905
|
+
right_node = Terminals().visitScalarItem(ctx_list[2])
|
|
1906
|
+
else:
|
|
1907
|
+
right_node = Terminals().visitVarID(ctx_list[2])
|
|
1905
1908
|
return BinOp(left=left_node, op=op_node, right=right_node, **extract_token_info(ctx))
|
|
1906
1909
|
|
|
1907
1910
|
def visitOptionalExpr(self, ctx: Parser.OptionalExprContext):
|
|
@@ -32,8 +32,8 @@ from vtlengine.AST import (
|
|
|
32
32
|
VarID,
|
|
33
33
|
)
|
|
34
34
|
from vtlengine.AST.ASTTemplate import ASTTemplate
|
|
35
|
-
from vtlengine.AST.DAG._words import DELETE, GLOBAL, INPUTS, INSERT, OUTPUTS, PERSISTENT
|
|
36
|
-
from vtlengine.AST.Grammar.tokens import AS, MEMBERSHIP, TO
|
|
35
|
+
from vtlengine.AST.DAG._words import DELETE, GLOBAL, INPUTS, INSERT, OUTPUTS, PERSISTENT, UNKNOWN
|
|
36
|
+
from vtlengine.AST.Grammar.tokens import AS, DROP, KEEP, MEMBERSHIP, RENAME, TO
|
|
37
37
|
from vtlengine.Exceptions import SemanticError
|
|
38
38
|
|
|
39
39
|
|
|
@@ -61,6 +61,8 @@ class DAGAnalyzer(ASTTemplate):
|
|
|
61
61
|
inputs: Optional[list] = None
|
|
62
62
|
outputs: Optional[list] = None
|
|
63
63
|
persistent: Optional[list] = None
|
|
64
|
+
unknown_variables: Optional[list] = None
|
|
65
|
+
unknown_variables_statement: Optional[list] = None
|
|
64
66
|
|
|
65
67
|
def __post_init__(self):
|
|
66
68
|
self.dependencies = {}
|
|
@@ -72,6 +74,8 @@ class DAGAnalyzer(ASTTemplate):
|
|
|
72
74
|
self.outputs = []
|
|
73
75
|
self.persistent = []
|
|
74
76
|
self.alias = []
|
|
77
|
+
self.unknown_variables = []
|
|
78
|
+
self.unknown_variables_statement = []
|
|
75
79
|
|
|
76
80
|
@classmethod
|
|
77
81
|
def ds_structure(cls, ast: AST):
|
|
@@ -176,7 +180,7 @@ class DAGAnalyzer(ASTTemplate):
|
|
|
176
180
|
""" """
|
|
177
181
|
# For each vertex
|
|
178
182
|
for key, statement in self.dependencies.items():
|
|
179
|
-
output = statement[OUTPUTS] + statement[PERSISTENT]
|
|
183
|
+
output = statement[OUTPUTS] + statement[PERSISTENT] + statement[UNKNOWN]
|
|
180
184
|
# If the statement has no := or -> symbol there is no vertex to add.
|
|
181
185
|
if len(output) != 0:
|
|
182
186
|
self.vertex[key] = output[0]
|
|
@@ -245,12 +249,15 @@ class DAGAnalyzer(ASTTemplate):
|
|
|
245
249
|
inputs = list(set(self.inputs))
|
|
246
250
|
outputs = list(set(self.outputs))
|
|
247
251
|
persistent = list(set(self.persistent))
|
|
252
|
+
unknown = list(set(self.unknown_variables_statement))
|
|
248
253
|
|
|
249
254
|
# Remove inputs that are outputs of some statement.
|
|
250
255
|
inputsF = [inputf for inputf in inputs if inputf not in outputs]
|
|
251
256
|
|
|
252
|
-
dict_ = {INPUTS: inputsF, OUTPUTS: outputs, PERSISTENT: persistent}
|
|
253
|
-
|
|
257
|
+
dict_ = {INPUTS: inputsF, OUTPUTS: outputs, PERSISTENT: persistent, UNKNOWN: unknown}
|
|
258
|
+
for variable in self.unknown_variables_statement:
|
|
259
|
+
if variable not in self.unknown_variables:
|
|
260
|
+
self.unknown_variables.append(variable)
|
|
254
261
|
return dict_
|
|
255
262
|
|
|
256
263
|
"""______________________________________________________________________________________
|
|
@@ -293,6 +300,19 @@ class DAGAnalyzer(ASTTemplate):
|
|
|
293
300
|
self.inputs = []
|
|
294
301
|
self.outputs = []
|
|
295
302
|
self.persistent = []
|
|
303
|
+
self.unknown_variables_statement = []
|
|
304
|
+
aux = copy.copy(self.unknown_variables)
|
|
305
|
+
for variable in aux:
|
|
306
|
+
for _number_of_statement, dependency in self.dependencies.items():
|
|
307
|
+
if variable in dependency[OUTPUTS]:
|
|
308
|
+
if variable in self.unknown_variables:
|
|
309
|
+
self.unknown_variables.remove(variable)
|
|
310
|
+
for _number_of_statement, dependency in self.dependencies.items():
|
|
311
|
+
if variable in dependency[UNKNOWN]:
|
|
312
|
+
dependency[UNKNOWN].remove(variable)
|
|
313
|
+
dependency[INPUTS].append(variable)
|
|
314
|
+
if variable not in self.inputs:
|
|
315
|
+
self.inputs.append(variable)
|
|
296
316
|
|
|
297
317
|
def visit_Assignment(self, node: Assignment) -> None:
|
|
298
318
|
if self.isFirstAssignment:
|
|
@@ -310,6 +330,8 @@ class DAGAnalyzer(ASTTemplate):
|
|
|
310
330
|
|
|
311
331
|
def visit_RegularAggregation(self, node: RegularAggregation) -> None:
|
|
312
332
|
self.visit(node.dataset)
|
|
333
|
+
if node.op in [KEEP, DROP, RENAME]:
|
|
334
|
+
return
|
|
313
335
|
for child in node.children:
|
|
314
336
|
self.isFromRegularAggregation = True
|
|
315
337
|
self.visit(child)
|
|
@@ -331,6 +353,13 @@ class DAGAnalyzer(ASTTemplate):
|
|
|
331
353
|
def visit_VarID(self, node: VarID) -> None:
|
|
332
354
|
if (not self.isFromRegularAggregation or self.isDataset) and node.value not in self.alias:
|
|
333
355
|
self.inputs.append(node.value)
|
|
356
|
+
elif (
|
|
357
|
+
self.isFromRegularAggregation
|
|
358
|
+
and node.value not in self.alias
|
|
359
|
+
and not self.isDataset
|
|
360
|
+
and node.value not in self.unknown_variables_statement
|
|
361
|
+
):
|
|
362
|
+
self.unknown_variables_statement.append(node.value)
|
|
334
363
|
|
|
335
364
|
def visit_Identifier(self, node: Identifier) -> None:
|
|
336
365
|
if node.kind == "DatasetID" and node.value not in self.alias:
|
|
@@ -219,11 +219,11 @@ timeOperators:
|
|
|
219
219
|
| YEAR_OP LPAREN expr RPAREN # yearAtom
|
|
220
220
|
| MONTH_OP LPAREN expr RPAREN # monthAtom
|
|
221
221
|
| DAYOFMONTH LPAREN expr RPAREN # dayOfMonthAtom
|
|
222
|
-
| DAYOFYEAR LPAREN expr RPAREN #
|
|
222
|
+
| DAYOFYEAR LPAREN expr RPAREN # dayOfYearAtom
|
|
223
223
|
| DAYTOYEAR LPAREN expr RPAREN # dayToYearAtom
|
|
224
224
|
| DAYTOMONTH LPAREN expr RPAREN # dayToMonthAtom
|
|
225
|
-
| YEARTODAY LPAREN expr RPAREN #
|
|
226
|
-
| MONTHTODAY LPAREN expr RPAREN #
|
|
225
|
+
| YEARTODAY LPAREN expr RPAREN # yearToDayAtom
|
|
226
|
+
| MONTHTODAY LPAREN expr RPAREN # monthToDayAtom
|
|
227
227
|
;
|
|
228
228
|
|
|
229
229
|
timeOperatorsComponent:
|
|
@@ -238,11 +238,11 @@ timeOperatorsComponent:
|
|
|
238
238
|
| YEAR_OP LPAREN exprComponent RPAREN # yearAtomComponent
|
|
239
239
|
| MONTH_OP LPAREN exprComponent RPAREN # monthAtomComponent
|
|
240
240
|
| DAYOFMONTH LPAREN exprComponent RPAREN # dayOfMonthAtomComponent
|
|
241
|
-
| DAYOFYEAR LPAREN exprComponent RPAREN #
|
|
241
|
+
| DAYOFYEAR LPAREN exprComponent RPAREN # dayOfYearAtomComponent
|
|
242
242
|
| DAYTOYEAR LPAREN exprComponent RPAREN # dayToYearAtomComponent
|
|
243
243
|
| DAYTOMONTH LPAREN exprComponent RPAREN # dayToMonthAtomComponent
|
|
244
|
-
| YEARTODAY LPAREN exprComponent RPAREN #
|
|
245
|
-
| MONTHTODAY LPAREN exprComponent RPAREN #
|
|
244
|
+
| YEARTODAY LPAREN exprComponent RPAREN # yearToDayAtomComponent
|
|
245
|
+
| MONTHTODAY LPAREN exprComponent RPAREN # monthToDayAtomComponent
|
|
246
246
|
;
|
|
247
247
|
|
|
248
248
|
setOperators:
|
|
@@ -363,7 +363,7 @@ calcClauseItem:
|
|
|
363
363
|
|
|
364
364
|
/*SUBSPACE CLAUSE*/
|
|
365
365
|
subspaceClauseItem:
|
|
366
|
-
componentID EQ scalarItem
|
|
366
|
+
componentID EQ (scalarItem | varID)
|
|
367
367
|
;
|
|
368
368
|
|
|
369
369
|
scalarItem:
|