vtlengine 1.0.3rc3__tar.gz → 1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/PKG-INFO +21 -17
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/pyproject.toml +51 -28
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/API/_InternalApi.py +288 -61
- vtlengine-1.1/src/vtlengine/API/__init__.py +507 -0
- vtlengine-1.1/src/vtlengine/API/data/schema/json_schema_2.1.json +116 -0
- vtlengine-1.1/src/vtlengine/AST/ASTComment.py +56 -0
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/AST/ASTConstructor.py +76 -22
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/AST/ASTConstructorModules/Expr.py +238 -120
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/AST/ASTConstructorModules/ExprComponents.py +126 -61
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/AST/ASTConstructorModules/Terminals.py +97 -42
- vtlengine-1.1/src/vtlengine/AST/ASTConstructorModules/__init__.py +50 -0
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/AST/ASTEncoders.py +5 -1
- vtlengine-1.1/src/vtlengine/AST/ASTString.py +608 -0
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/AST/ASTTemplate.py +28 -2
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/AST/DAG/__init__.py +10 -4
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/AST/Grammar/lexer.py +0 -1
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/AST/Grammar/parser.py +185 -440
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/AST/VtlVisitor.py +0 -1
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/AST/__init__.py +127 -14
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/DataTypes/TimeHandling.py +50 -15
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/DataTypes/__init__.py +79 -7
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/Exceptions/__init__.py +3 -5
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/Exceptions/messages.py +74 -105
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/Interpreter/__init__.py +136 -46
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/Model/__init__.py +14 -11
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/Operators/Aggregation.py +17 -9
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/Operators/Analytic.py +64 -20
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/Operators/Assignment.py +0 -1
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/Operators/CastOperator.py +44 -44
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/Operators/Clause.py +16 -10
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/Operators/Comparison.py +20 -12
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/Operators/Conditional.py +47 -15
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/Operators/General.py +9 -4
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/Operators/HROperators.py +4 -14
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/Operators/Join.py +15 -14
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/Operators/Numeric.py +32 -26
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/Operators/RoleSetter.py +6 -2
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/Operators/Set.py +12 -8
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/Operators/String.py +9 -9
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/Operators/Time.py +145 -124
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/Operators/Validation.py +10 -4
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/Operators/__init__.py +56 -69
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/Utils/__init__.py +55 -1
- vtlengine-1.1/src/vtlengine/__extras_check.py +17 -0
- vtlengine-1.1/src/vtlengine/__init__.py +3 -0
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/files/output/__init__.py +2 -1
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/files/output/_time_period_representation.py +2 -1
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/files/parser/__init__.py +52 -46
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/files/parser/_time_checking.py +4 -4
- vtlengine-1.0.3rc3/src/vtlengine/API/__init__.py +0 -309
- vtlengine-1.0.3rc3/src/vtlengine/DataTypes/NumericTypesHandling.py +0 -38
- vtlengine-1.0.3rc3/src/vtlengine/__init__.py +0 -3
- vtlengine-1.0.3rc3/src/vtlengine/files/__init__.py +0 -0
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/LICENSE.md +0 -0
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/README.md +0 -0
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/AST/ASTDataExchange.py +0 -0
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/AST/ASTVisitor.py +0 -0
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/AST/DAG/_words.py +0 -0
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/AST/Grammar/Vtl.g4 +0 -0
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/AST/Grammar/VtlTokens.g4 +0 -0
- {vtlengine-1.0.3rc3/src/vtlengine/AST/ASTConstructorModules → vtlengine-1.1/src/vtlengine/AST/Grammar}/__init__.py +0 -0
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/AST/Grammar/tokens.py +0 -0
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/Operators/Boolean.py +0 -0
- {vtlengine-1.0.3rc3/src/vtlengine/AST/Grammar → vtlengine-1.1/src/vtlengine/files}/__init__.py +0 -0
- {vtlengine-1.0.3rc3 → vtlengine-1.1}/src/vtlengine/files/parser/_rfc_dialect.py +0 -0
|
@@ -1,31 +1,35 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: vtlengine
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.1
|
|
4
4
|
Summary: Run and Validate VTL Scripts
|
|
5
5
|
License: AGPL-3.0
|
|
6
6
|
Keywords: vtl,sdmx,vtlengine,Validation and Transformation Language
|
|
7
7
|
Author: MeaningfulData
|
|
8
8
|
Author-email: info@meaningfuldata.eu
|
|
9
|
-
|
|
9
|
+
Maintainer: Francisco Javier Hernandez del Caño
|
|
10
|
+
Maintainer-email: javier.hernandez@meaningfuldata.eu
|
|
11
|
+
Requires-Python: >=3.9
|
|
10
12
|
Classifier: Development Status :: 5 - Production/Stable
|
|
11
13
|
Classifier: Intended Audience :: Developers
|
|
12
14
|
Classifier: Intended Audience :: Information Technology
|
|
13
15
|
Classifier: Intended Audience :: Science/Research
|
|
14
|
-
Classifier: License :: OSI Approved :: GNU Affero General Public License v3
|
|
15
|
-
Classifier: Programming Language :: Python :: 3
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
-
Classifier: Programming Language :: Python :: 3.13
|
|
20
16
|
Classifier: Typing :: Typed
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
Requires-Dist:
|
|
24
|
-
Requires-Dist:
|
|
25
|
-
Requires-Dist:
|
|
26
|
-
Requires-Dist:
|
|
27
|
-
Requires-Dist:
|
|
28
|
-
Requires-Dist:
|
|
17
|
+
Provides-Extra: all
|
|
18
|
+
Provides-Extra: s3
|
|
19
|
+
Requires-Dist: antlr4-python3-runtime (>=4.9.2,<4.10)
|
|
20
|
+
Requires-Dist: duckdb (>=1.1,<1.2)
|
|
21
|
+
Requires-Dist: fsspec (>=2022.11.0,<2023.0) ; extra == "all"
|
|
22
|
+
Requires-Dist: fsspec (>=2022.11.0,<2023.0) ; extra == "s3"
|
|
23
|
+
Requires-Dist: jsonschema (>=3.2.0,<5.0)
|
|
24
|
+
Requires-Dist: networkx (>=2.8,<3.0)
|
|
25
|
+
Requires-Dist: numpy (>=1.23.2,<2) ; python_version < "3.13"
|
|
26
|
+
Requires-Dist: numpy (>=2.1.0) ; python_version >= "3.13"
|
|
27
|
+
Requires-Dist: pandas (>=2.1.4,<2.2) ; python_version < "3.13"
|
|
28
|
+
Requires-Dist: pandas (>=2.2,<3.0) ; python_version >= "3.13"
|
|
29
|
+
Requires-Dist: pysdmx[xml] (>=1.3.0,<2.0)
|
|
30
|
+
Requires-Dist: s3fs (>=2022.11.0,<2023.0) ; extra == "all"
|
|
31
|
+
Requires-Dist: s3fs (>=2022.11.0,<2023.0) ; extra == "s3"
|
|
32
|
+
Requires-Dist: sqlglot (>=22.2.0,<23.0)
|
|
29
33
|
Project-URL: Authors, https://github.com/Meaningful-Data/vtlengine/graphs/contributors
|
|
30
34
|
Project-URL: Documentation, https://docs.vtlengine.meaningfuldata.eu
|
|
31
35
|
Project-URL: IssueTracker, https://github.com/Meaningful-Data/vtlengine/issues
|
|
@@ -1,11 +1,18 @@
|
|
|
1
|
-
[
|
|
1
|
+
[project]
|
|
2
2
|
name = "vtlengine"
|
|
3
|
-
version = "1.
|
|
3
|
+
version = "1.1"
|
|
4
4
|
description = "Run and Validate VTL Scripts"
|
|
5
|
-
authors = ["MeaningfulData <info@meaningfuldata.eu>"]
|
|
6
5
|
license = "AGPL-3.0"
|
|
7
6
|
readme = "README.md"
|
|
8
|
-
|
|
7
|
+
requires-python = ">=3.9"
|
|
8
|
+
authors = [
|
|
9
|
+
{name = "MeaningfulData", email = "info@meaningfuldata.eu"},
|
|
10
|
+
]
|
|
11
|
+
maintainers = [
|
|
12
|
+
{name = "Francisco Javier Hernandez del Caño", email = "javier.hernandez@meaningfuldata.eu"},
|
|
13
|
+
{name = "Alberto Hernandez del Caño", email = "alberto.hernandez@meaningfuldata.eu"},
|
|
14
|
+
{name = "Mateo de Lorenzo Argeles", email = "mateo.delorenzo@meaningfuldata.eu"}
|
|
15
|
+
]
|
|
9
16
|
classifiers = [
|
|
10
17
|
"Development Status :: 5 - Production/Stable",
|
|
11
18
|
"Intended Audience :: Developers",
|
|
@@ -13,10 +20,28 @@ classifiers = [
|
|
|
13
20
|
"Intended Audience :: Science/Research",
|
|
14
21
|
"Typing :: Typed"
|
|
15
22
|
]
|
|
16
|
-
|
|
17
23
|
keywords = ['vtl', 'sdmx', 'vtlengine', 'Validation and Transformation Language']
|
|
18
24
|
|
|
19
|
-
[
|
|
25
|
+
dependencies = [
|
|
26
|
+
# PyPi dependencies
|
|
27
|
+
"duckdb>=1.1,<1.2",
|
|
28
|
+
"pysdmx[xml]>=1.3.0,<2.0",
|
|
29
|
+
# APT-supported dependencies
|
|
30
|
+
"jsonschema>=3.2.0,<5.0",
|
|
31
|
+
"sqlglot>=22.2.0,<23.0",
|
|
32
|
+
"antlr4-python3-runtime>=4.9.2,<4.10",
|
|
33
|
+
"pandas>=2.1.4,<2.2 ; python_version < '3.13'",
|
|
34
|
+
"pandas>=2.2,<3.0 ; python_version >= '3.13'",
|
|
35
|
+
"networkx>=2.8,<3.0",
|
|
36
|
+
"numpy>=1.23.2,<2 ; python_version < '3.13'",
|
|
37
|
+
"numpy>=2.1.0; python_version >= '3.13'",
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
[project.optional-dependencies]
|
|
41
|
+
s3 = ["s3fs>=2022.11.0,<2023.0", "fsspec>=2022.11.0,<2023.0"]
|
|
42
|
+
all = ["s3fs>=2022.11.0,<2023.0", "fsspec>=2022.11.0,<2023.0"]
|
|
43
|
+
|
|
44
|
+
[project.urls]
|
|
20
45
|
Repository = 'https://github.com/Meaningful-Data/vtlengine'
|
|
21
46
|
Documentation = 'https://docs.vtlengine.meaningfuldata.eu'
|
|
22
47
|
MeaningfulData = 'https://www.meaningfuldata.eu/'
|
|
@@ -24,31 +49,23 @@ IssueTracker = 'https://github.com/Meaningful-Data/vtlengine/issues'
|
|
|
24
49
|
Authors = 'https://github.com/Meaningful-Data/vtlengine/graphs/contributors'
|
|
25
50
|
|
|
26
51
|
[tool.poetry.dependencies]
|
|
27
|
-
python = "
|
|
28
|
-
# PyPi dependencies
|
|
29
|
-
duckdb = "^1.1.1"
|
|
30
|
-
#numba = "^0.60.0"
|
|
31
|
-
s3fs = "^2024.9.0"
|
|
52
|
+
python = ">=3.9,<4.0"
|
|
32
53
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
numexpr = "^2.9.0"
|
|
37
|
-
pandas = "^2.1.4"
|
|
38
|
-
bottleneck = "^1.3.4"
|
|
39
|
-
sqlglot = "^22.2.0"
|
|
40
|
-
|
|
41
|
-
[tool.poetry.dev-dependencies]
|
|
42
|
-
pytest = "^7.3"
|
|
43
|
-
pytest-cov = "^5.0.0"
|
|
54
|
+
[tool.poetry.group.dev.dependencies]
|
|
55
|
+
pytest = "^8.3"
|
|
56
|
+
pytest-cov = "^6.1.1"
|
|
44
57
|
line-profiler-pycharm = "^1.2.0"
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
58
|
+
mypy = "1.15.0"
|
|
59
|
+
pandas-stubs = "^2.1.4.231227"
|
|
60
|
+
ruff = "^0.11.11"
|
|
61
|
+
types-jsonschema = "4.23.0.20250516"
|
|
62
|
+
|
|
63
|
+
[tool.poetry.group.docs.dependencies]
|
|
64
|
+
sphinx = "^7.4.7"
|
|
65
|
+
sphinx-rtd-theme = "^3.0.2"
|
|
50
66
|
toml = "^0.10.2"
|
|
51
|
-
|
|
67
|
+
|
|
68
|
+
|
|
52
69
|
|
|
53
70
|
[tool.ruff]
|
|
54
71
|
line-length = 100
|
|
@@ -84,6 +101,12 @@ enable_error_code = [
|
|
|
84
101
|
]
|
|
85
102
|
warn_return_any = false
|
|
86
103
|
|
|
104
|
+
[tool.pytest.ini_options]
|
|
105
|
+
addopts = "--strict-markers"
|
|
106
|
+
markers = [
|
|
107
|
+
"input_path: directory where tests data files are stored"
|
|
108
|
+
]
|
|
109
|
+
|
|
87
110
|
[build-system]
|
|
88
111
|
requires = ["poetry-core"]
|
|
89
112
|
build-backend = "poetry.core.masonry.api"
|
|
@@ -1,17 +1,33 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Any, Dict, List, Optional, Union
|
|
4
|
+
from typing import Any, Dict, List, Literal, Optional, Union
|
|
5
5
|
|
|
6
|
+
import jsonschema
|
|
6
7
|
import pandas as pd
|
|
7
|
-
from
|
|
8
|
+
from pysdmx.model.dataflow import Component as SDMXComponent
|
|
9
|
+
from pysdmx.model.dataflow import DataStructureDefinition, Schema
|
|
10
|
+
from pysdmx.model.dataflow import Role as SDMX_Role
|
|
11
|
+
from pysdmx.model.vtl import (
|
|
12
|
+
Ruleset,
|
|
13
|
+
RulesetScheme,
|
|
14
|
+
Transformation,
|
|
15
|
+
TransformationScheme,
|
|
16
|
+
UserDefinedOperator,
|
|
17
|
+
UserDefinedOperatorScheme,
|
|
18
|
+
)
|
|
8
19
|
|
|
9
|
-
from vtlengine
|
|
20
|
+
from vtlengine import AST as AST
|
|
21
|
+
from vtlengine.__extras_check import __check_s3_extra
|
|
22
|
+
from vtlengine.AST import Assignment, DPRuleset, HRuleset, Operator, PersistentAssignment, Start
|
|
23
|
+
from vtlengine.AST.ASTString import ASTString
|
|
10
24
|
from vtlengine.DataTypes import SCALAR_TYPES
|
|
11
|
-
from vtlengine.Exceptions import check_key
|
|
25
|
+
from vtlengine.Exceptions import InputValidationException, check_key
|
|
12
26
|
from vtlengine.files.parser import _fill_dataset_empty_data, _validate_pandas
|
|
13
27
|
from vtlengine.Model import (
|
|
14
|
-
Component,
|
|
28
|
+
Component as VTL_Component,
|
|
29
|
+
)
|
|
30
|
+
from vtlengine.Model import (
|
|
15
31
|
Dataset,
|
|
16
32
|
ExternalRoutine,
|
|
17
33
|
Role,
|
|
@@ -19,15 +35,13 @@ from vtlengine.Model import (
|
|
|
19
35
|
Scalar,
|
|
20
36
|
ValueDomain,
|
|
21
37
|
)
|
|
38
|
+
from vtlengine.Utils import VTL_DTYPES_MAPPING, VTL_ROLE_MAPPING
|
|
22
39
|
|
|
23
40
|
base_path = Path(__file__).parent
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
filepath_csv = base_path / "data" / "DataSet" / "input"
|
|
29
|
-
filepath_out_json = base_path / "data" / "DataStructure" / "output"
|
|
30
|
-
filepath_out_csv = base_path / "data" / "DataSet" / "output"
|
|
41
|
+
schema_path = base_path / "data" / "schema"
|
|
42
|
+
sdmx_csv_path = base_path / "data" / "sdmx_csv"
|
|
43
|
+
with open(schema_path / "json_schema_2.1.json", "r") as file:
|
|
44
|
+
schema = json.load(file)
|
|
31
45
|
|
|
32
46
|
|
|
33
47
|
def _load_dataset_from_structure(structures: Dict[str, Any]) -> Dict[str, Any]:
|
|
@@ -41,22 +55,60 @@ def _load_dataset_from_structure(structures: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
41
55
|
dataset_name = dataset_json["name"]
|
|
42
56
|
components = {}
|
|
43
57
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
58
|
+
if "structure" in dataset_json:
|
|
59
|
+
structure_name = dataset_json["structure"]
|
|
60
|
+
structure_json = None
|
|
61
|
+
for s in structures["structures"]:
|
|
62
|
+
if s["name"] == structure_name:
|
|
63
|
+
structure_json = s
|
|
64
|
+
if structure_json is None:
|
|
65
|
+
raise InputValidationException(code="0-3-1-1", message="Structure not found.")
|
|
66
|
+
try:
|
|
67
|
+
jsonschema.validate(instance=structure_json, schema=schema)
|
|
68
|
+
except jsonschema.exceptions.ValidationError as e:
|
|
69
|
+
raise InputValidationException(code="0-3-1-1", message=e.message)
|
|
70
|
+
|
|
71
|
+
for component in structure_json["components"]:
|
|
72
|
+
check_key("data_type", SCALAR_TYPES.keys(), component["data_type"])
|
|
73
|
+
if component["role"] == "ViralAttribute":
|
|
74
|
+
component["role"] = "Attribute"
|
|
75
|
+
|
|
76
|
+
check_key("role", Role_keys, component["role"])
|
|
77
|
+
|
|
78
|
+
if "nullable" not in component:
|
|
79
|
+
if Role(component["role"]) == Role.IDENTIFIER:
|
|
80
|
+
component["nullable"] = False
|
|
81
|
+
elif Role(component["role"]) in (Role.MEASURE, Role.ATTRIBUTE):
|
|
82
|
+
component["nullable"] = True
|
|
83
|
+
else:
|
|
84
|
+
component["nullable"] = False
|
|
85
|
+
|
|
86
|
+
components[component["name"]] = VTL_Component(
|
|
87
|
+
name=component["name"],
|
|
88
|
+
data_type=SCALAR_TYPES[component["data_type"]],
|
|
89
|
+
role=Role(component["role"]),
|
|
90
|
+
nullable=component["nullable"],
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
if "DataStructure" in dataset_json:
|
|
94
|
+
for component in dataset_json["DataStructure"]:
|
|
95
|
+
check_key("data_type", SCALAR_TYPES.keys(), component["type"])
|
|
96
|
+
check_key("role", Role_keys, component["role"])
|
|
97
|
+
components[component["name"]] = VTL_Component(
|
|
98
|
+
name=component["name"],
|
|
99
|
+
data_type=SCALAR_TYPES[component["type"]],
|
|
100
|
+
role=Role(component["role"]),
|
|
101
|
+
nullable=component["nullable"],
|
|
102
|
+
)
|
|
53
103
|
|
|
54
104
|
datasets[dataset_name] = Dataset(name=dataset_name, components=components, data=None)
|
|
55
105
|
if "scalars" in structures:
|
|
56
106
|
for scalar_json in structures["scalars"]:
|
|
57
107
|
scalar_name = scalar_json["name"]
|
|
58
108
|
scalar = Scalar(
|
|
59
|
-
name=scalar_name,
|
|
109
|
+
name=scalar_name,
|
|
110
|
+
data_type=SCALAR_TYPES[scalar_json["type"]],
|
|
111
|
+
value=None,
|
|
60
112
|
)
|
|
61
113
|
datasets[scalar_name] = scalar # type: ignore[assignment]
|
|
62
114
|
return datasets
|
|
@@ -70,38 +122,16 @@ def _load_single_datapoint(datapoint: Union[str, Path]) -> Dict[str, Any]:
|
|
|
70
122
|
raise Exception("Invalid datapoint. Input must be a Path or an S3 URI")
|
|
71
123
|
if isinstance(datapoint, str):
|
|
72
124
|
if "s3://" in datapoint:
|
|
73
|
-
|
|
74
|
-
s3fs_obj = S3FileSystem()
|
|
75
|
-
|
|
76
|
-
# Check if the S3 URI is valid
|
|
77
|
-
if not s3fs_obj.exists(datapoint):
|
|
78
|
-
raise Exception(
|
|
79
|
-
f"Invalid datapoint. S3 URI does not exist or it is not accessible: {datapoint}"
|
|
80
|
-
)
|
|
81
|
-
|
|
82
|
-
# Check if the S3 URI is a directory
|
|
83
|
-
if s3fs_obj.isdir(datapoint):
|
|
84
|
-
datapoints: Dict[str, Any] = {}
|
|
85
|
-
for f in s3fs_obj.ls(datapoint):
|
|
86
|
-
if f.endswith(".csv"):
|
|
87
|
-
dataset_name = f.split("/")[-1].removesuffix(".csv")
|
|
88
|
-
dict_data = {dataset_name: f"s3://{f}"}
|
|
89
|
-
datapoints = {**datapoints, **dict_data}
|
|
90
|
-
return datapoints
|
|
91
|
-
|
|
92
|
-
# Check if the S3 URI is a csv file
|
|
93
|
-
if s3fs_obj.isfile(datapoint) and not datapoint.endswith(".csv"):
|
|
94
|
-
raise Exception(f"Invalid datapoint. S3 URI must refer to a csv file: {datapoint}")
|
|
125
|
+
__check_s3_extra()
|
|
95
126
|
dataset_name = datapoint.split("/")[-1].removesuffix(".csv")
|
|
96
127
|
dict_data = {dataset_name: datapoint}
|
|
97
128
|
return dict_data
|
|
98
|
-
|
|
99
129
|
try:
|
|
100
130
|
datapoint = Path(datapoint)
|
|
101
131
|
except Exception:
|
|
102
132
|
raise Exception("Invalid datapoint. Input must refer to a Path or an S3 URI")
|
|
103
133
|
if datapoint.is_dir():
|
|
104
|
-
datapoints = {}
|
|
134
|
+
datapoints: Dict[str, Any] = {}
|
|
105
135
|
for f in datapoint.iterdir():
|
|
106
136
|
if f.suffix != ".csv":
|
|
107
137
|
continue
|
|
@@ -115,7 +145,7 @@ def _load_single_datapoint(datapoint: Union[str, Path]) -> Dict[str, Any]:
|
|
|
115
145
|
|
|
116
146
|
|
|
117
147
|
def _load_datapoints_path(
|
|
118
|
-
datapoints: Union[Path, str, List[Union[str, Path]]]
|
|
148
|
+
datapoints: Union[Path, str, List[Union[str, Path]]],
|
|
119
149
|
) -> Dict[str, Dataset]:
|
|
120
150
|
"""
|
|
121
151
|
Returns a dict with the data given from a Path.
|
|
@@ -156,7 +186,7 @@ def _load_datastructure_single(data_structure: Union[Dict[str, Any], Path]) -> D
|
|
|
156
186
|
|
|
157
187
|
|
|
158
188
|
def load_datasets(
|
|
159
|
-
data_structure: Union[Dict[str, Any], Path, List[
|
|
189
|
+
data_structure: Union[Dict[str, Any], Path, List[Dict[str, Any]], List[Path]],
|
|
160
190
|
) -> Dict[str, Dataset]:
|
|
161
191
|
"""
|
|
162
192
|
Loads multiple datasets.
|
|
@@ -365,25 +395,222 @@ def _check_output_folder(output_folder: Union[str, Path]) -> None:
|
|
|
365
395
|
"""
|
|
366
396
|
if isinstance(output_folder, str):
|
|
367
397
|
if "s3://" in output_folder:
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
try:
|
|
372
|
-
s3fs_obj.mkdir(output_folder)
|
|
373
|
-
except Exception:
|
|
374
|
-
raise Exception(
|
|
375
|
-
f"Invalid output folder. S3 URI is invalid or "
|
|
376
|
-
f"it is not accessible: {output_folder}"
|
|
377
|
-
)
|
|
398
|
+
__check_s3_extra()
|
|
399
|
+
if not output_folder.endswith("/"):
|
|
400
|
+
raise ValueError("Output folder must be a Path or S3 URI to a directory")
|
|
378
401
|
return
|
|
379
402
|
try:
|
|
380
403
|
output_folder = Path(output_folder)
|
|
381
404
|
except Exception:
|
|
382
|
-
raise
|
|
405
|
+
raise ValueError("Output folder must be a Path or S3 URI to a directory")
|
|
383
406
|
|
|
384
407
|
if not isinstance(output_folder, Path):
|
|
385
|
-
raise
|
|
408
|
+
raise ValueError("Output folder must be a Path or S3 URI to a directory")
|
|
386
409
|
if not output_folder.exists():
|
|
387
410
|
if output_folder.suffix != "":
|
|
388
|
-
raise
|
|
411
|
+
raise ValueError("Output folder must be a Path or S3 URI to a directory")
|
|
389
412
|
os.mkdir(output_folder)
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
def to_vtl_json(dsd: Union[DataStructureDefinition, Schema], dataset_name: str) -> Dict[str, Any]:
|
|
416
|
+
"""
|
|
417
|
+
Converts a pysdmx `DataStructureDefinition` or `Schema` into a VTL-compatible JSON
|
|
418
|
+
representation.
|
|
419
|
+
|
|
420
|
+
This function extracts and transforms the components (dimensions, measures, and attributes)
|
|
421
|
+
from the given SDMX data structure and maps them into a dictionary format that conforms
|
|
422
|
+
to the expected VTL data structure json schema.
|
|
423
|
+
|
|
424
|
+
Args:
|
|
425
|
+
dsd: An instance of `DataStructureDefinition` or `Schema` from the `pysdmx` model.
|
|
426
|
+
dataset_name: The name of the resulting VTL dataset.
|
|
427
|
+
|
|
428
|
+
Returns:
|
|
429
|
+
A dictionary representing the dataset in VTL format, with keys for dataset name and its
|
|
430
|
+
components, including their name, role, data type, and nullability.
|
|
431
|
+
"""
|
|
432
|
+
components = []
|
|
433
|
+
NAME = "name"
|
|
434
|
+
ROLE = "role"
|
|
435
|
+
TYPE = "type"
|
|
436
|
+
NULLABLE = "nullable"
|
|
437
|
+
|
|
438
|
+
_components: List[SDMXComponent] = []
|
|
439
|
+
_components.extend(dsd.components.dimensions)
|
|
440
|
+
_components.extend(dsd.components.measures)
|
|
441
|
+
_components.extend(dsd.components.attributes)
|
|
442
|
+
|
|
443
|
+
for c in _components:
|
|
444
|
+
_type = VTL_DTYPES_MAPPING[c.dtype]
|
|
445
|
+
_nullability = c.role != SDMX_Role.DIMENSION
|
|
446
|
+
_role = VTL_ROLE_MAPPING[c.role]
|
|
447
|
+
|
|
448
|
+
component = {
|
|
449
|
+
NAME: c.id,
|
|
450
|
+
ROLE: _role,
|
|
451
|
+
TYPE: _type,
|
|
452
|
+
NULLABLE: _nullability,
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
components.append(component)
|
|
456
|
+
|
|
457
|
+
result = {"datasets": [{"name": dataset_name, "DataStructure": components}]}
|
|
458
|
+
|
|
459
|
+
return result
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
def __generate_transformation(
|
|
463
|
+
child: Union[Assignment, PersistentAssignment], is_persistent: bool, count: int
|
|
464
|
+
) -> Transformation:
|
|
465
|
+
expression = ASTString().render(ast=child.right)
|
|
466
|
+
result = child.left.value # type: ignore[attr-defined]
|
|
467
|
+
return Transformation(
|
|
468
|
+
id=f"T{count}",
|
|
469
|
+
expression=expression,
|
|
470
|
+
is_persistent=is_persistent,
|
|
471
|
+
result=result,
|
|
472
|
+
name=f"Transformation {result}",
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
def __generate_udo(child: Operator, count: int) -> UserDefinedOperator:
|
|
477
|
+
operator_definition = ASTString().render(ast=child)
|
|
478
|
+
return UserDefinedOperator(
|
|
479
|
+
id=f"UDO{count}",
|
|
480
|
+
operator_definition=operator_definition,
|
|
481
|
+
name=f"UserDefinedOperator {child.op}",
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
def __generate_ruleset(child: Union[DPRuleset, HRuleset], count: int) -> Ruleset:
|
|
486
|
+
ruleset_definition = ASTString().render(ast=child)
|
|
487
|
+
ruleset_type: Literal["datapoint", "hierarchical"] = (
|
|
488
|
+
"datapoint" if isinstance(child, DPRuleset) else "hierarchical"
|
|
489
|
+
)
|
|
490
|
+
return Ruleset(
|
|
491
|
+
id=f"R{count}",
|
|
492
|
+
ruleset_definition=ruleset_definition,
|
|
493
|
+
ruleset_type=ruleset_type,
|
|
494
|
+
name=f"{ruleset_type.capitalize()} ruleset {child.name}",
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
def ast_to_sdmx(ast: AST.Start, agency_id: str, id: str, version: str) -> TransformationScheme:
|
|
499
|
+
"""
|
|
500
|
+
Converts a vtl AST into an SDMX compatible `TransformationScheme` object, following
|
|
501
|
+
the pysdmx model.
|
|
502
|
+
|
|
503
|
+
This function iterates over the child nodes of the given AST and categorizes each into one of
|
|
504
|
+
the following types:
|
|
505
|
+
- `PersistentAssignment`: Represents a persistent transformation. These are added to the
|
|
506
|
+
transformation list with a persistence flag.
|
|
507
|
+
- `Assignment`: Represents a temporary (non-persistent) transformation. These are added to the
|
|
508
|
+
transformation list without the persistence flag
|
|
509
|
+
- `DPRuleset` or `HRuleset`: Represent validation rule sets.
|
|
510
|
+
These are collected and wrapped into a `RulesetScheme` object.
|
|
511
|
+
- `Operator`: Defines user-defined operators. These are collected
|
|
512
|
+
into a `UserDefinedOperatorScheme` object.
|
|
513
|
+
|
|
514
|
+
After parsing all AST elements:
|
|
515
|
+
- If any rulesets were found, a `RulesetScheme` is created and added to the references.
|
|
516
|
+
- If any user-defined operators were found, a `UserDefinedOperatorScheme` is created and added
|
|
517
|
+
to the references.
|
|
518
|
+
- A `TransformationScheme` object is constructed with all collected transformations and any
|
|
519
|
+
additional references.
|
|
520
|
+
|
|
521
|
+
Args:
|
|
522
|
+
ast: The root node of the vtl ast representing the set of
|
|
523
|
+
vtl expressions.
|
|
524
|
+
agency_id: The identifier of the agency defining the SDMX structure as a string.
|
|
525
|
+
id: The identifier of the transformation scheme as a string.
|
|
526
|
+
version: The version of the transformation scheme given as a string.
|
|
527
|
+
|
|
528
|
+
Returns:
|
|
529
|
+
TransformationScheme: A fully constructed transformation scheme that includes
|
|
530
|
+
transformations, and optionally rule sets and user-defined operator schemes,
|
|
531
|
+
suitable for SDMX.
|
|
532
|
+
|
|
533
|
+
"""
|
|
534
|
+
list_transformation = []
|
|
535
|
+
list_udos = []
|
|
536
|
+
list_rulesets = []
|
|
537
|
+
count_transformation = 0
|
|
538
|
+
count_udo = 0
|
|
539
|
+
count_ruleset = 0
|
|
540
|
+
|
|
541
|
+
for child in ast.children:
|
|
542
|
+
if isinstance(child, PersistentAssignment):
|
|
543
|
+
count_transformation += 1
|
|
544
|
+
list_transformation.append(
|
|
545
|
+
__generate_transformation(
|
|
546
|
+
child=child, is_persistent=True, count=count_transformation
|
|
547
|
+
)
|
|
548
|
+
)
|
|
549
|
+
elif isinstance(child, Assignment):
|
|
550
|
+
count_transformation += 1
|
|
551
|
+
list_transformation.append(
|
|
552
|
+
__generate_transformation(
|
|
553
|
+
child=child, is_persistent=False, count=count_transformation
|
|
554
|
+
)
|
|
555
|
+
)
|
|
556
|
+
elif isinstance(child, (DPRuleset, HRuleset)):
|
|
557
|
+
count_ruleset += 1
|
|
558
|
+
list_rulesets.append(__generate_ruleset(child=child, count=count_ruleset))
|
|
559
|
+
elif isinstance(child, Operator):
|
|
560
|
+
count_udo += 1
|
|
561
|
+
list_udos.append(__generate_udo(child=child, count=count_udo))
|
|
562
|
+
|
|
563
|
+
references: Any = {}
|
|
564
|
+
if list_rulesets:
|
|
565
|
+
references["ruleset_schemes"] = [
|
|
566
|
+
RulesetScheme(
|
|
567
|
+
items=list_rulesets,
|
|
568
|
+
agency=agency_id,
|
|
569
|
+
id="RS1",
|
|
570
|
+
vtl_version="2.1",
|
|
571
|
+
version=version,
|
|
572
|
+
name=f"RulesetScheme {id}-RS",
|
|
573
|
+
)
|
|
574
|
+
]
|
|
575
|
+
if list_udos:
|
|
576
|
+
references["user_defined_operator_schemes"] = [
|
|
577
|
+
UserDefinedOperatorScheme(
|
|
578
|
+
items=list_udos,
|
|
579
|
+
agency=agency_id,
|
|
580
|
+
id="UDS1",
|
|
581
|
+
vtl_version="2.1",
|
|
582
|
+
version=version,
|
|
583
|
+
name=f"UserDefinedOperatorScheme {id}-UDS",
|
|
584
|
+
)
|
|
585
|
+
]
|
|
586
|
+
|
|
587
|
+
transformation_scheme = TransformationScheme(
|
|
588
|
+
items=list_transformation,
|
|
589
|
+
agency=agency_id,
|
|
590
|
+
id="TS1",
|
|
591
|
+
vtl_version="2.1",
|
|
592
|
+
version=version,
|
|
593
|
+
name=f"TransformationScheme {id}",
|
|
594
|
+
**references,
|
|
595
|
+
)
|
|
596
|
+
|
|
597
|
+
return transformation_scheme
|
|
598
|
+
|
|
599
|
+
|
|
600
|
+
def _check_script(script: Union[str, TransformationScheme, Path]) -> str:
|
|
601
|
+
"""
|
|
602
|
+
Check if the TransformationScheme object is valid to generate a vtl script.
|
|
603
|
+
"""
|
|
604
|
+
if not isinstance(script, (str, TransformationScheme, Path)):
|
|
605
|
+
raise Exception(
|
|
606
|
+
"Invalid script format. Input must be a string, TransformationScheme or Path object"
|
|
607
|
+
)
|
|
608
|
+
if isinstance(script, TransformationScheme):
|
|
609
|
+
from pysdmx.toolkit.vtl.generate_vtl_script import (
|
|
610
|
+
generate_vtl_script,
|
|
611
|
+
)
|
|
612
|
+
|
|
613
|
+
vtl_script = generate_vtl_script(script, model_validation=True)
|
|
614
|
+
return vtl_script
|
|
615
|
+
else:
|
|
616
|
+
return str(script)
|