vtlengine 1.0.3rc3__tar.gz → 1.1rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/PKG-INFO +17 -17
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/pyproject.toml +45 -30
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/API/_InternalApi.py +64 -58
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/API/__init__.py +11 -2
- vtlengine-1.1rc1/src/vtlengine/API/data/schema/json_schema_2.1.json +116 -0
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/AST/ASTConstructor.py +5 -4
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/AST/ASTConstructorModules/Expr.py +47 -48
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/AST/ASTConstructorModules/ExprComponents.py +45 -23
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/AST/ASTConstructorModules/Terminals.py +21 -11
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/AST/ASTEncoders.py +1 -1
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/AST/DAG/__init__.py +0 -3
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/AST/Grammar/lexer.py +0 -1
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/AST/Grammar/parser.py +185 -440
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/AST/VtlVisitor.py +0 -1
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/DataTypes/TimeHandling.py +50 -15
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/DataTypes/__init__.py +79 -7
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/Exceptions/__init__.py +3 -5
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/Exceptions/messages.py +65 -105
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/Interpreter/__init__.py +83 -38
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/Model/__init__.py +7 -9
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/Operators/Aggregation.py +13 -7
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/Operators/Analytic.py +48 -9
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/Operators/Assignment.py +0 -1
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/Operators/CastOperator.py +44 -44
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/Operators/Clause.py +16 -10
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/Operators/Comparison.py +20 -12
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/Operators/Conditional.py +30 -13
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/Operators/General.py +9 -4
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/Operators/HROperators.py +4 -14
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/Operators/Join.py +15 -14
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/Operators/Numeric.py +32 -26
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/Operators/RoleSetter.py +6 -2
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/Operators/Set.py +12 -8
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/Operators/String.py +9 -9
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/Operators/Time.py +136 -116
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/Operators/Validation.py +10 -4
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/Operators/__init__.py +56 -69
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/Utils/__init__.py +6 -1
- vtlengine-1.1rc1/src/vtlengine/__extras_check.py +17 -0
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/files/output/__init__.py +2 -1
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/files/output/_time_period_representation.py +2 -1
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/files/parser/__init__.py +47 -31
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/files/parser/_rfc_dialect.py +1 -1
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/files/parser/_time_checking.py +4 -4
- vtlengine-1.0.3rc3/src/vtlengine/DataTypes/NumericTypesHandling.py +0 -38
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/LICENSE.md +0 -0
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/README.md +0 -0
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/AST/ASTConstructorModules/__init__.py +0 -0
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/AST/ASTDataExchange.py +0 -0
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/AST/ASTTemplate.py +0 -0
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/AST/ASTVisitor.py +0 -0
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/AST/DAG/_words.py +0 -0
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/AST/Grammar/Vtl.g4 +0 -0
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/AST/Grammar/VtlTokens.g4 +0 -0
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/AST/Grammar/__init__.py +0 -0
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/AST/Grammar/tokens.py +0 -0
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/AST/__init__.py +0 -0
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/Operators/Boolean.py +0 -0
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/__init__.py +0 -0
- {vtlengine-1.0.3rc3 → vtlengine-1.1rc1}/src/vtlengine/files/__init__.py +0 -0
|
@@ -1,31 +1,31 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: vtlengine
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.1rc1
|
|
4
4
|
Summary: Run and Validate VTL Scripts
|
|
5
5
|
License: AGPL-3.0
|
|
6
6
|
Keywords: vtl,sdmx,vtlengine,Validation and Transformation Language
|
|
7
7
|
Author: MeaningfulData
|
|
8
8
|
Author-email: info@meaningfuldata.eu
|
|
9
|
-
|
|
9
|
+
Maintainer: Francisco Javier Hernandez del Caño
|
|
10
|
+
Maintainer-email: javier.hernandez@meaningfuldata.eu
|
|
11
|
+
Requires-Python: >=3.9,<4
|
|
10
12
|
Classifier: Development Status :: 5 - Production/Stable
|
|
11
13
|
Classifier: Intended Audience :: Developers
|
|
12
14
|
Classifier: Intended Audience :: Information Technology
|
|
13
15
|
Classifier: Intended Audience :: Science/Research
|
|
14
|
-
Classifier: License :: OSI Approved :: GNU Affero General Public License v3
|
|
15
|
-
Classifier: Programming Language :: Python :: 3
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
-
Classifier: Programming Language :: Python :: 3.13
|
|
20
16
|
Classifier: Typing :: Typed
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
Requires-Dist:
|
|
24
|
-
Requires-Dist:
|
|
25
|
-
Requires-Dist:
|
|
26
|
-
Requires-Dist:
|
|
27
|
-
Requires-Dist:
|
|
28
|
-
Requires-Dist:
|
|
17
|
+
Provides-Extra: all
|
|
18
|
+
Provides-Extra: s3
|
|
19
|
+
Requires-Dist: antlr4-python3-runtime (>=4.9.2,<4.10)
|
|
20
|
+
Requires-Dist: duckdb (>=1.1,<1.2)
|
|
21
|
+
Requires-Dist: fsspec (>=2022.11.0,<2023.0) ; extra == "all"
|
|
22
|
+
Requires-Dist: fsspec (>=2022.11.0,<2023.0) ; extra == "s3"
|
|
23
|
+
Requires-Dist: jsonschema (>=3.2.0,<5.0)
|
|
24
|
+
Requires-Dist: networkx (>=2.8,<3.0)
|
|
25
|
+
Requires-Dist: pandas (>=2.1,<3.0)
|
|
26
|
+
Requires-Dist: s3fs (>=2022.11.0,<2023.0) ; extra == "all"
|
|
27
|
+
Requires-Dist: s3fs (>=2022.11.0,<2023.0) ; extra == "s3"
|
|
28
|
+
Requires-Dist: sqlglot (>=22.2.0,<23.0)
|
|
29
29
|
Project-URL: Authors, https://github.com/Meaningful-Data/vtlengine/graphs/contributors
|
|
30
30
|
Project-URL: Documentation, https://docs.vtlengine.meaningfuldata.eu
|
|
31
31
|
Project-URL: IssueTracker, https://github.com/Meaningful-Data/vtlengine/issues
|
|
@@ -1,11 +1,18 @@
|
|
|
1
|
-
[
|
|
1
|
+
[project]
|
|
2
2
|
name = "vtlengine"
|
|
3
|
-
version = "1.
|
|
3
|
+
version = "1.1rc1"
|
|
4
4
|
description = "Run and Validate VTL Scripts"
|
|
5
|
-
authors = ["MeaningfulData <info@meaningfuldata.eu>"]
|
|
6
5
|
license = "AGPL-3.0"
|
|
7
6
|
readme = "README.md"
|
|
8
|
-
|
|
7
|
+
requires-python = ">=3.9,<4"
|
|
8
|
+
authors = [
|
|
9
|
+
{name = "MeaningfulData", email = "info@meaningfuldata.eu"},
|
|
10
|
+
]
|
|
11
|
+
maintainers = [
|
|
12
|
+
{name = "Francisco Javier Hernandez del Caño", email = "javier.hernandez@meaningfuldata.eu"},
|
|
13
|
+
{name = "Alberto Hernandez del Caño", email = "alberto.hernandez@meaningfuldata.eu"},
|
|
14
|
+
{name = "Mateo de Lorenzo Argeles", email = "mateo.delorenzo@meaningfuldata.eu"}
|
|
15
|
+
]
|
|
9
16
|
classifiers = [
|
|
10
17
|
"Development Status :: 5 - Production/Stable",
|
|
11
18
|
"Intended Audience :: Developers",
|
|
@@ -13,42 +20,44 @@ classifiers = [
|
|
|
13
20
|
"Intended Audience :: Science/Research",
|
|
14
21
|
"Typing :: Typed"
|
|
15
22
|
]
|
|
16
|
-
|
|
17
23
|
keywords = ['vtl', 'sdmx', 'vtlengine', 'Validation and Transformation Language']
|
|
18
24
|
|
|
19
|
-
[
|
|
25
|
+
dependencies = [
|
|
26
|
+
# PyPi dependencies
|
|
27
|
+
"duckdb>=1.1,<1.2",
|
|
28
|
+
|
|
29
|
+
# APT-supported dependencies
|
|
30
|
+
"jsonschema>=3.2.0,<5.0",
|
|
31
|
+
"sqlglot>=22.2.0,<23.0",
|
|
32
|
+
"antlr4-python3-runtime>=4.9.2,<4.10",
|
|
33
|
+
"pandas>=2.1,<3.0",
|
|
34
|
+
"networkx>=2.8,<3.0",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
[project.optional-dependencies]
|
|
38
|
+
s3 = ["s3fs>=2022.11.0,<2023.0", "fsspec>=2022.11.0,<2023.0"]
|
|
39
|
+
all = ["s3fs>=2022.11.0,<2023.0", "fsspec>=2022.11.0,<2023.0"]
|
|
40
|
+
|
|
41
|
+
[project.urls]
|
|
20
42
|
Repository = 'https://github.com/Meaningful-Data/vtlengine'
|
|
21
43
|
Documentation = 'https://docs.vtlengine.meaningfuldata.eu'
|
|
22
44
|
MeaningfulData = 'https://www.meaningfuldata.eu/'
|
|
23
45
|
IssueTracker = 'https://github.com/Meaningful-Data/vtlengine/issues'
|
|
24
46
|
Authors = 'https://github.com/Meaningful-Data/vtlengine/graphs/contributors'
|
|
25
47
|
|
|
26
|
-
[tool.poetry.dependencies]
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
duckdb = "^1.1.1"
|
|
30
|
-
#numba = "^0.60.0"
|
|
31
|
-
s3fs = "^2024.9.0"
|
|
32
|
-
|
|
33
|
-
# APT dependencies
|
|
34
|
-
antlr4-python3-runtime = "4.9.2"
|
|
35
|
-
networkx = "^2.8.8"
|
|
36
|
-
numexpr = "^2.9.0"
|
|
37
|
-
pandas = "^2.1.4"
|
|
38
|
-
bottleneck = "^1.3.4"
|
|
39
|
-
sqlglot = "^22.2.0"
|
|
40
|
-
|
|
41
|
-
[tool.poetry.dev-dependencies]
|
|
42
|
-
pytest = "^7.3"
|
|
43
|
-
pytest-cov = "^5.0.0"
|
|
48
|
+
[tool.poetry.group.dev.dependencies]
|
|
49
|
+
pytest = "^8.3"
|
|
50
|
+
pytest-cov = "^6.0.0"
|
|
44
51
|
line-profiler-pycharm = "^1.2.0"
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
52
|
+
mypy = "1.14.1"
|
|
53
|
+
pandas-stubs = "^2.1.4.231227"
|
|
54
|
+
ruff = "^0.9.4"
|
|
55
|
+
types-jsonschema = "4.23.0.20241208"
|
|
56
|
+
|
|
57
|
+
[tool.poetry.group.docs.dependencies]
|
|
58
|
+
sphinx = "^7.4.7"
|
|
59
|
+
sphinx-rtd-theme = "^3.0.2"
|
|
50
60
|
toml = "^0.10.2"
|
|
51
|
-
ruff = "^0.7.1"
|
|
52
61
|
|
|
53
62
|
[tool.ruff]
|
|
54
63
|
line-length = 100
|
|
@@ -84,6 +93,12 @@ enable_error_code = [
|
|
|
84
93
|
]
|
|
85
94
|
warn_return_any = false
|
|
86
95
|
|
|
96
|
+
[tool.pytest.ini_options]
|
|
97
|
+
addopts = "--strict-markers"
|
|
98
|
+
markers = [
|
|
99
|
+
"input_path: directory where tests data files are stored"
|
|
100
|
+
]
|
|
101
|
+
|
|
87
102
|
[build-system]
|
|
88
103
|
requires = ["poetry-core"]
|
|
89
104
|
build-backend = "poetry.core.masonry.api"
|
|
@@ -3,12 +3,13 @@ import os
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from typing import Any, Dict, List, Optional, Union
|
|
5
5
|
|
|
6
|
+
import jsonschema
|
|
6
7
|
import pandas as pd
|
|
7
|
-
from s3fs import S3FileSystem # type: ignore[import-untyped]
|
|
8
8
|
|
|
9
|
+
from vtlengine.__extras_check import __check_s3_extra
|
|
9
10
|
from vtlengine.AST import PersistentAssignment, Start
|
|
10
11
|
from vtlengine.DataTypes import SCALAR_TYPES
|
|
11
|
-
from vtlengine.Exceptions import check_key
|
|
12
|
+
from vtlengine.Exceptions import InputValidationException, check_key
|
|
12
13
|
from vtlengine.files.parser import _fill_dataset_empty_data, _validate_pandas
|
|
13
14
|
from vtlengine.Model import (
|
|
14
15
|
Component,
|
|
@@ -21,13 +22,9 @@ from vtlengine.Model import (
|
|
|
21
22
|
)
|
|
22
23
|
|
|
23
24
|
base_path = Path(__file__).parent
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
filepath_json = base_path / "data" / "DataStructure" / "input"
|
|
28
|
-
filepath_csv = base_path / "data" / "DataSet" / "input"
|
|
29
|
-
filepath_out_json = base_path / "data" / "DataStructure" / "output"
|
|
30
|
-
filepath_out_csv = base_path / "data" / "DataSet" / "output"
|
|
25
|
+
schema_path = base_path / "data" / "schema"
|
|
26
|
+
with open(schema_path / "json_schema_2.1.json", "r") as file:
|
|
27
|
+
schema = json.load(file)
|
|
31
28
|
|
|
32
29
|
|
|
33
30
|
def _load_dataset_from_structure(structures: Dict[str, Any]) -> Dict[str, Any]:
|
|
@@ -41,22 +38,60 @@ def _load_dataset_from_structure(structures: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
41
38
|
dataset_name = dataset_json["name"]
|
|
42
39
|
components = {}
|
|
43
40
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
41
|
+
if "structure" in dataset_json:
|
|
42
|
+
structure_name = dataset_json["structure"]
|
|
43
|
+
structure_json = None
|
|
44
|
+
for s in structures["structures"]:
|
|
45
|
+
if s["name"] == structure_name:
|
|
46
|
+
structure_json = s
|
|
47
|
+
if structure_json is None:
|
|
48
|
+
raise InputValidationException(code="0-3-1-1", message="Structure not found.")
|
|
49
|
+
try:
|
|
50
|
+
jsonschema.validate(instance=structure_json, schema=schema)
|
|
51
|
+
except jsonschema.exceptions.ValidationError as e:
|
|
52
|
+
raise InputValidationException(code="0-3-1-1", message=e.message)
|
|
53
|
+
|
|
54
|
+
for component in structure_json["components"]:
|
|
55
|
+
check_key("data_type", SCALAR_TYPES.keys(), component["data_type"])
|
|
56
|
+
if component["role"] == "ViralAttribute":
|
|
57
|
+
component["role"] = "Attribute"
|
|
58
|
+
|
|
59
|
+
check_key("role", Role_keys, component["role"])
|
|
60
|
+
|
|
61
|
+
if "nullable" not in component:
|
|
62
|
+
if Role(component["role"]) == Role.IDENTIFIER:
|
|
63
|
+
component["nullable"] = False
|
|
64
|
+
elif Role(component["role"]) in (Role.MEASURE, Role.ATTRIBUTE):
|
|
65
|
+
component["nullable"] = True
|
|
66
|
+
else:
|
|
67
|
+
component["nullable"] = False
|
|
68
|
+
|
|
69
|
+
components[component["name"]] = Component(
|
|
70
|
+
name=component["name"],
|
|
71
|
+
data_type=SCALAR_TYPES[component["data_type"]],
|
|
72
|
+
role=Role(component["role"]),
|
|
73
|
+
nullable=component["nullable"],
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
if "DataStructure" in dataset_json:
|
|
77
|
+
for component in dataset_json["DataStructure"]:
|
|
78
|
+
check_key("data_type", SCALAR_TYPES.keys(), component["type"])
|
|
79
|
+
check_key("role", Role_keys, component["role"])
|
|
80
|
+
components[component["name"]] = Component(
|
|
81
|
+
name=component["name"],
|
|
82
|
+
data_type=SCALAR_TYPES[component["type"]],
|
|
83
|
+
role=Role(component["role"]),
|
|
84
|
+
nullable=component["nullable"],
|
|
85
|
+
)
|
|
53
86
|
|
|
54
87
|
datasets[dataset_name] = Dataset(name=dataset_name, components=components, data=None)
|
|
55
88
|
if "scalars" in structures:
|
|
56
89
|
for scalar_json in structures["scalars"]:
|
|
57
90
|
scalar_name = scalar_json["name"]
|
|
58
91
|
scalar = Scalar(
|
|
59
|
-
name=scalar_name,
|
|
92
|
+
name=scalar_name,
|
|
93
|
+
data_type=SCALAR_TYPES[scalar_json["type"]],
|
|
94
|
+
value=None,
|
|
60
95
|
)
|
|
61
96
|
datasets[scalar_name] = scalar # type: ignore[assignment]
|
|
62
97
|
return datasets
|
|
@@ -70,38 +105,16 @@ def _load_single_datapoint(datapoint: Union[str, Path]) -> Dict[str, Any]:
|
|
|
70
105
|
raise Exception("Invalid datapoint. Input must be a Path or an S3 URI")
|
|
71
106
|
if isinstance(datapoint, str):
|
|
72
107
|
if "s3://" in datapoint:
|
|
73
|
-
|
|
74
|
-
s3fs_obj = S3FileSystem()
|
|
75
|
-
|
|
76
|
-
# Check if the S3 URI is valid
|
|
77
|
-
if not s3fs_obj.exists(datapoint):
|
|
78
|
-
raise Exception(
|
|
79
|
-
f"Invalid datapoint. S3 URI does not exist or it is not accessible: {datapoint}"
|
|
80
|
-
)
|
|
81
|
-
|
|
82
|
-
# Check if the S3 URI is a directory
|
|
83
|
-
if s3fs_obj.isdir(datapoint):
|
|
84
|
-
datapoints: Dict[str, Any] = {}
|
|
85
|
-
for f in s3fs_obj.ls(datapoint):
|
|
86
|
-
if f.endswith(".csv"):
|
|
87
|
-
dataset_name = f.split("/")[-1].removesuffix(".csv")
|
|
88
|
-
dict_data = {dataset_name: f"s3://{f}"}
|
|
89
|
-
datapoints = {**datapoints, **dict_data}
|
|
90
|
-
return datapoints
|
|
91
|
-
|
|
92
|
-
# Check if the S3 URI is a csv file
|
|
93
|
-
if s3fs_obj.isfile(datapoint) and not datapoint.endswith(".csv"):
|
|
94
|
-
raise Exception(f"Invalid datapoint. S3 URI must refer to a csv file: {datapoint}")
|
|
108
|
+
__check_s3_extra()
|
|
95
109
|
dataset_name = datapoint.split("/")[-1].removesuffix(".csv")
|
|
96
110
|
dict_data = {dataset_name: datapoint}
|
|
97
111
|
return dict_data
|
|
98
|
-
|
|
99
112
|
try:
|
|
100
113
|
datapoint = Path(datapoint)
|
|
101
114
|
except Exception:
|
|
102
115
|
raise Exception("Invalid datapoint. Input must refer to a Path or an S3 URI")
|
|
103
116
|
if datapoint.is_dir():
|
|
104
|
-
datapoints = {}
|
|
117
|
+
datapoints: Dict[str, Any] = {}
|
|
105
118
|
for f in datapoint.iterdir():
|
|
106
119
|
if f.suffix != ".csv":
|
|
107
120
|
continue
|
|
@@ -115,7 +128,7 @@ def _load_single_datapoint(datapoint: Union[str, Path]) -> Dict[str, Any]:
|
|
|
115
128
|
|
|
116
129
|
|
|
117
130
|
def _load_datapoints_path(
|
|
118
|
-
datapoints: Union[Path, str, List[Union[str, Path]]]
|
|
131
|
+
datapoints: Union[Path, str, List[Union[str, Path]]],
|
|
119
132
|
) -> Dict[str, Dataset]:
|
|
120
133
|
"""
|
|
121
134
|
Returns a dict with the data given from a Path.
|
|
@@ -156,7 +169,7 @@ def _load_datastructure_single(data_structure: Union[Dict[str, Any], Path]) -> D
|
|
|
156
169
|
|
|
157
170
|
|
|
158
171
|
def load_datasets(
|
|
159
|
-
data_structure: Union[Dict[str, Any], Path, List[Union[Dict[str, Any], Path]]]
|
|
172
|
+
data_structure: Union[Dict[str, Any], Path, List[Union[Dict[str, Any], Path]]],
|
|
160
173
|
) -> Dict[str, Dataset]:
|
|
161
174
|
"""
|
|
162
175
|
Loads multiple datasets.
|
|
@@ -365,25 +378,18 @@ def _check_output_folder(output_folder: Union[str, Path]) -> None:
|
|
|
365
378
|
"""
|
|
366
379
|
if isinstance(output_folder, str):
|
|
367
380
|
if "s3://" in output_folder:
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
try:
|
|
372
|
-
s3fs_obj.mkdir(output_folder)
|
|
373
|
-
except Exception:
|
|
374
|
-
raise Exception(
|
|
375
|
-
f"Invalid output folder. S3 URI is invalid or "
|
|
376
|
-
f"it is not accessible: {output_folder}"
|
|
377
|
-
)
|
|
381
|
+
__check_s3_extra()
|
|
382
|
+
if not output_folder.endswith("/"):
|
|
383
|
+
raise ValueError("Output folder must be a Path or S3 URI to a directory")
|
|
378
384
|
return
|
|
379
385
|
try:
|
|
380
386
|
output_folder = Path(output_folder)
|
|
381
387
|
except Exception:
|
|
382
|
-
raise
|
|
388
|
+
raise ValueError("Output folder must be a Path or S3 URI to a directory")
|
|
383
389
|
|
|
384
390
|
if not isinstance(output_folder, Path):
|
|
385
|
-
raise
|
|
391
|
+
raise ValueError("Output folder must be a Path or S3 URI to a directory")
|
|
386
392
|
if not output_folder.exists():
|
|
387
393
|
if output_folder.suffix != "":
|
|
388
|
-
raise
|
|
394
|
+
raise ValueError("Output folder must be a Path or S3 URI to a directory")
|
|
389
395
|
os.mkdir(output_folder)
|
|
@@ -32,7 +32,13 @@ class __VTLSingleErrorListener(ErrorListener): # type: ignore[misc]
|
|
|
32
32
|
""" """
|
|
33
33
|
|
|
34
34
|
def syntaxError(
|
|
35
|
-
self,
|
|
35
|
+
self,
|
|
36
|
+
recognizer: Any,
|
|
37
|
+
offendingSymbol: str,
|
|
38
|
+
line: str,
|
|
39
|
+
column: str,
|
|
40
|
+
msg: str,
|
|
41
|
+
e: Any,
|
|
36
42
|
) -> None:
|
|
37
43
|
raise Exception(
|
|
38
44
|
f"Not valid VTL Syntax \n "
|
|
@@ -150,7 +156,10 @@ def semantic_analysis(
|
|
|
150
156
|
|
|
151
157
|
# Running the interpreter
|
|
152
158
|
interpreter = InterpreterAnalyzer(
|
|
153
|
-
datasets=structures,
|
|
159
|
+
datasets=structures,
|
|
160
|
+
value_domains=vd,
|
|
161
|
+
external_routines=ext_routines,
|
|
162
|
+
only_semantic=True,
|
|
154
163
|
)
|
|
155
164
|
with pd.option_context("future.no_silent_downcasting", True):
|
|
156
165
|
result = interpreter.visit(ast)
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"description": "VTL Metadata JSON serialization",
|
|
4
|
+
"$defs": {
|
|
5
|
+
"vtl-id": {
|
|
6
|
+
"type": "string",
|
|
7
|
+
"pattern": "^[a-zA-Z][a-zA-Z0-9_]*$|^'.*'$"
|
|
8
|
+
},
|
|
9
|
+
"set-type": {
|
|
10
|
+
"type": "array",
|
|
11
|
+
"uniqueItems": true,
|
|
12
|
+
"oneOf": [
|
|
13
|
+
{ "items": { "oneOf": [ { "type": "string" }, { "type": "null" } ] } },
|
|
14
|
+
{ "items": { "oneOf": [ { "type": "number" }, { "type": "null" } ] } }
|
|
15
|
+
]
|
|
16
|
+
},
|
|
17
|
+
"identifiable": {
|
|
18
|
+
"type": "object",
|
|
19
|
+
"properties": {
|
|
20
|
+
"name": { "$ref": "#/$defs/vtl-id" },
|
|
21
|
+
"description": { "type": "string" }
|
|
22
|
+
},
|
|
23
|
+
"required": [ "name" ]
|
|
24
|
+
}
|
|
25
|
+
},
|
|
26
|
+
"type": "object",
|
|
27
|
+
"properties": {
|
|
28
|
+
"datasets": {
|
|
29
|
+
"type": "array",
|
|
30
|
+
"items": {
|
|
31
|
+
"allOf": [ { "$ref": "#/$defs/identifiable" } ],
|
|
32
|
+
"properties": {
|
|
33
|
+
"source": { "type": "string" },
|
|
34
|
+
"structure": { "$ref": "#/$defs/vtl-id" }
|
|
35
|
+
},
|
|
36
|
+
"required": [ "structure" ]
|
|
37
|
+
}
|
|
38
|
+
},
|
|
39
|
+
"structures": {
|
|
40
|
+
"type": "array",
|
|
41
|
+
"items": {
|
|
42
|
+
"allOf": [ { "$ref": "#/$defs/identifiable" } ],
|
|
43
|
+
"properties": {
|
|
44
|
+
"components": {
|
|
45
|
+
"type": "array",
|
|
46
|
+
"items": {
|
|
47
|
+
"allOf": [ { "$ref": "#/$defs/identifiable" } ],
|
|
48
|
+
"properties": {
|
|
49
|
+
"role": {
|
|
50
|
+
"type": "string",
|
|
51
|
+
"enum": [ "Identifier", "Measure", "Attribute", "Viral Attribute" ]
|
|
52
|
+
},
|
|
53
|
+
"subset": { "$ref": "#/$defs/vtl-id" },
|
|
54
|
+
"nullable": { "type": "boolean" },
|
|
55
|
+
"data_type": {
|
|
56
|
+
"type": "string",
|
|
57
|
+
"enum": [ "String", "Number", "Integer", "Boolean", "Time", "TimePeriod", "Date", "Duration" ]
|
|
58
|
+
}
|
|
59
|
+
},
|
|
60
|
+
"required": [ "role" ]
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
},
|
|
64
|
+
"required": [ "components" ]
|
|
65
|
+
}
|
|
66
|
+
},
|
|
67
|
+
"variables": {
|
|
68
|
+
"type": "array",
|
|
69
|
+
"items": {
|
|
70
|
+
"allOf": [ { "$ref": "#/$defs/identifiable" } ],
|
|
71
|
+
"properties": {
|
|
72
|
+
"domain": { "$ref": "#/$defs/vtl-id" }
|
|
73
|
+
},
|
|
74
|
+
"required": [ "domain" ]
|
|
75
|
+
}
|
|
76
|
+
},
|
|
77
|
+
"domains": {
|
|
78
|
+
"type": "array",
|
|
79
|
+
"items": {
|
|
80
|
+
"allOf": [ { "$ref": "#/$defs/identifiable" } ],
|
|
81
|
+
"unevaluatedProperties": false,
|
|
82
|
+
"oneOf": [
|
|
83
|
+
{
|
|
84
|
+
"properties": {
|
|
85
|
+
"externalRef": { "type": "string" }
|
|
86
|
+
},
|
|
87
|
+
"required": [ "externalRef" ]
|
|
88
|
+
}, {
|
|
89
|
+
"properties": {
|
|
90
|
+
"parent": { "$ref": "#/$defs/vtl-id" }
|
|
91
|
+
},
|
|
92
|
+
"required": [ "parent" ],
|
|
93
|
+
"oneOf": [{
|
|
94
|
+
"properties": {
|
|
95
|
+
"restriction": { "$ref": "#/$defs/set-type" }
|
|
96
|
+
},
|
|
97
|
+
"required": [ "restriction" ]
|
|
98
|
+
}, {
|
|
99
|
+
"properties": {
|
|
100
|
+
"enumerated": { "$ref": "#/$defs/set-type" }
|
|
101
|
+
},
|
|
102
|
+
"required": [ "enumerated" ]
|
|
103
|
+
}, {
|
|
104
|
+
"properties": {
|
|
105
|
+
"described": { "type": "string" }
|
|
106
|
+
},
|
|
107
|
+
"required": [ "described" ]
|
|
108
|
+
}
|
|
109
|
+
]
|
|
110
|
+
}
|
|
111
|
+
]
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
@@ -382,7 +382,8 @@ class ASTVisitor(VtlVisitor):
|
|
|
382
382
|
if conditions:
|
|
383
383
|
identifiers_list = [
|
|
384
384
|
DefIdentifier(
|
|
385
|
-
value=elto.alias if getattr(elto, "alias", None) else elto.value,
|
|
385
|
+
value=elto.alias if getattr(elto, "alias", None) else elto.value,
|
|
386
|
+
kind=kind,
|
|
386
387
|
)
|
|
387
388
|
for elto in conditions[0]
|
|
388
389
|
]
|
|
@@ -395,7 +396,7 @@ class ASTVisitor(VtlVisitor):
|
|
|
395
396
|
def visitValueDomainSignature(self, ctx: Parser.ValueDomainSignatureContext):
|
|
396
397
|
"""
|
|
397
398
|
valueDomainSignature: CONDITION IDENTIFIER (AS IDENTIFIER)? (',' IDENTIFIER (AS IDENTIFIER)?)* ;
|
|
398
|
-
"""
|
|
399
|
+
""" # noqa E501
|
|
399
400
|
# AST_ASTCONSTRUCTOR.7
|
|
400
401
|
ctx_list = list(ctx.getChildren())
|
|
401
402
|
component_nodes = [
|
|
@@ -459,7 +460,7 @@ class ASTVisitor(VtlVisitor):
|
|
|
459
460
|
codeItemRelation: ( WHEN expr THEN )? codeItemRef codeItemRelationClause (codeItemRelationClause)* ;
|
|
460
461
|
( WHEN exprComponent THEN )? codetemRef=valueDomainValue comparisonOperand? codeItemRelationClause (codeItemRelationClause)*
|
|
461
462
|
|
|
462
|
-
"""
|
|
463
|
+
""" # noqa E501
|
|
463
464
|
|
|
464
465
|
ctx_list = list(ctx.getChildren())
|
|
465
466
|
|
|
@@ -512,7 +513,7 @@ class ASTVisitor(VtlVisitor):
|
|
|
512
513
|
def visitCodeItemRelationClause(self, ctx: Parser.CodeItemRelationClauseContext):
|
|
513
514
|
"""
|
|
514
515
|
(opAdd=( PLUS | MINUS ))? rightCodeItem=valueDomainValue ( QLPAREN rightCondition=exprComponent QRPAREN )?
|
|
515
|
-
"""
|
|
516
|
+
""" # noqa E501
|
|
516
517
|
ctx_list = list(ctx.getChildren())
|
|
517
518
|
|
|
518
519
|
expr = [expr for expr in ctx_list if isinstance(expr, Parser.ExprContext)]
|