vtlengine 1.0.4__py3-none-any.whl → 1.1rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +9 -38
- vtlengine/__extras_check.py +17 -0
- vtlengine/files/output/__init__.py +2 -0
- vtlengine/files/parser/__init__.py +8 -26
- {vtlengine-1.0.4.dist-info → vtlengine-1.1rc1.dist-info}/METADATA +16 -17
- {vtlengine-1.0.4.dist-info → vtlengine-1.1rc1.dist-info}/RECORD +8 -7
- {vtlengine-1.0.4.dist-info → vtlengine-1.1rc1.dist-info}/LICENSE.md +0 -0
- {vtlengine-1.0.4.dist-info → vtlengine-1.1rc1.dist-info}/WHEEL +0 -0
vtlengine/API/_InternalApi.py
CHANGED
|
@@ -5,8 +5,8 @@ from typing import Any, Dict, List, Optional, Union
|
|
|
5
5
|
|
|
6
6
|
import jsonschema
|
|
7
7
|
import pandas as pd
|
|
8
|
-
from s3fs import S3FileSystem # type: ignore[import-untyped]
|
|
9
8
|
|
|
9
|
+
from vtlengine.__extras_check import __check_s3_extra
|
|
10
10
|
from vtlengine.AST import PersistentAssignment, Start
|
|
11
11
|
from vtlengine.DataTypes import SCALAR_TYPES
|
|
12
12
|
from vtlengine.Exceptions import InputValidationException, check_key
|
|
@@ -105,38 +105,16 @@ def _load_single_datapoint(datapoint: Union[str, Path]) -> Dict[str, Any]:
|
|
|
105
105
|
raise Exception("Invalid datapoint. Input must be a Path or an S3 URI")
|
|
106
106
|
if isinstance(datapoint, str):
|
|
107
107
|
if "s3://" in datapoint:
|
|
108
|
-
|
|
109
|
-
s3fs_obj = S3FileSystem()
|
|
110
|
-
|
|
111
|
-
# Check if the S3 URI is valid
|
|
112
|
-
if not s3fs_obj.exists(datapoint):
|
|
113
|
-
raise Exception(
|
|
114
|
-
f"Invalid datapoint. S3 URI does not exist or it is not accessible: {datapoint}"
|
|
115
|
-
)
|
|
116
|
-
|
|
117
|
-
# Check if the S3 URI is a directory
|
|
118
|
-
if s3fs_obj.isdir(datapoint):
|
|
119
|
-
datapoints: Dict[str, Any] = {}
|
|
120
|
-
for f in s3fs_obj.ls(datapoint):
|
|
121
|
-
if f.endswith(".csv"):
|
|
122
|
-
dataset_name = f.split("/")[-1].removesuffix(".csv")
|
|
123
|
-
dict_data = {dataset_name: f"s3://{f}"}
|
|
124
|
-
datapoints = {**datapoints, **dict_data}
|
|
125
|
-
return datapoints
|
|
126
|
-
|
|
127
|
-
# Check if the S3 URI is a csv file
|
|
128
|
-
if s3fs_obj.isfile(datapoint) and not datapoint.endswith(".csv"):
|
|
129
|
-
raise Exception(f"Invalid datapoint. S3 URI must refer to a csv file: {datapoint}")
|
|
108
|
+
__check_s3_extra()
|
|
130
109
|
dataset_name = datapoint.split("/")[-1].removesuffix(".csv")
|
|
131
110
|
dict_data = {dataset_name: datapoint}
|
|
132
111
|
return dict_data
|
|
133
|
-
|
|
134
112
|
try:
|
|
135
113
|
datapoint = Path(datapoint)
|
|
136
114
|
except Exception:
|
|
137
115
|
raise Exception("Invalid datapoint. Input must refer to a Path or an S3 URI")
|
|
138
116
|
if datapoint.is_dir():
|
|
139
|
-
datapoints = {}
|
|
117
|
+
datapoints: Dict[str, Any] = {}
|
|
140
118
|
for f in datapoint.iterdir():
|
|
141
119
|
if f.suffix != ".csv":
|
|
142
120
|
continue
|
|
@@ -400,25 +378,18 @@ def _check_output_folder(output_folder: Union[str, Path]) -> None:
|
|
|
400
378
|
"""
|
|
401
379
|
if isinstance(output_folder, str):
|
|
402
380
|
if "s3://" in output_folder:
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
try:
|
|
407
|
-
s3fs_obj.mkdir(output_folder)
|
|
408
|
-
except Exception:
|
|
409
|
-
raise Exception(
|
|
410
|
-
f"Invalid output folder. S3 URI is invalid or "
|
|
411
|
-
f"it is not accessible: {output_folder}"
|
|
412
|
-
)
|
|
381
|
+
__check_s3_extra()
|
|
382
|
+
if not output_folder.endswith("/"):
|
|
383
|
+
raise ValueError("Output folder must be a Path or S3 URI to a directory")
|
|
413
384
|
return
|
|
414
385
|
try:
|
|
415
386
|
output_folder = Path(output_folder)
|
|
416
387
|
except Exception:
|
|
417
|
-
raise
|
|
388
|
+
raise ValueError("Output folder must be a Path or S3 URI to a directory")
|
|
418
389
|
|
|
419
390
|
if not isinstance(output_folder, Path):
|
|
420
|
-
raise
|
|
391
|
+
raise ValueError("Output folder must be a Path or S3 URI to a directory")
|
|
421
392
|
if not output_folder.exists():
|
|
422
393
|
if output_folder.suffix != "":
|
|
423
|
-
raise
|
|
394
|
+
raise ValueError("Output folder must be a Path or S3 URI to a directory")
|
|
424
395
|
os.mkdir(output_folder)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import importlib.util
|
|
2
|
+
|
|
3
|
+
EXTRAS_DOCS = "https://docs.vtlengine.meaningfuldata.eu/#installation"
|
|
4
|
+
ERROR_MESSAGE = (
|
|
5
|
+
"The '{extra_name}' extra is required to run {extra_desc}. "
|
|
6
|
+
"Please install it using 'pip install vtlengine[{extra_name}]' or "
|
|
7
|
+
"install all extras with 'pip install vtlengine[all]'. "
|
|
8
|
+
f"Check the documentation at: {EXTRAS_DOCS}"
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def __check_s3_extra() -> None:
|
|
13
|
+
package_loc = importlib.util.find_spec("s3fs")
|
|
14
|
+
if package_loc is None:
|
|
15
|
+
raise ImportError(
|
|
16
|
+
ERROR_MESSAGE.format(extra_name="s3", extra_desc="over csv files using S3 URIs")
|
|
17
|
+
) from None
|
|
@@ -3,6 +3,7 @@ from typing import Optional, Union
|
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
|
|
6
|
+
from vtlengine.__extras_check import __check_s3_extra
|
|
6
7
|
from vtlengine.files.output._time_period_representation import (
|
|
7
8
|
TimePeriodRepresentation,
|
|
8
9
|
format_time_period_external_representation,
|
|
@@ -20,6 +21,7 @@ def save_datapoints(
|
|
|
20
21
|
if time_period_representation is not None:
|
|
21
22
|
format_time_period_external_representation(dataset, time_period_representation)
|
|
22
23
|
if isinstance(output_path, str):
|
|
24
|
+
__check_s3_extra()
|
|
23
25
|
if output_path.endswith("/"):
|
|
24
26
|
s3_file_output = output_path + f"{dataset.name}.csv"
|
|
25
27
|
else:
|
|
@@ -109,7 +109,7 @@ def _sanitize_pandas_columns(
|
|
|
109
109
|
return data
|
|
110
110
|
|
|
111
111
|
|
|
112
|
-
def _pandas_load_csv(components: Dict[str, Component], csv_path: Path) -> pd.DataFrame:
|
|
112
|
+
def _pandas_load_csv(components: Dict[str, Component], csv_path: Union[str, Path]) -> pd.DataFrame:
|
|
113
113
|
obj_dtypes = {comp_name: np.object_ for comp_name, comp in components.items()}
|
|
114
114
|
|
|
115
115
|
try:
|
|
@@ -121,31 +121,14 @@ def _pandas_load_csv(components: Dict[str, Component], csv_path: Path) -> pd.Dat
|
|
|
121
121
|
na_values=[""],
|
|
122
122
|
)
|
|
123
123
|
except UnicodeDecodeError:
|
|
124
|
-
|
|
124
|
+
if isinstance(csv_path, Path):
|
|
125
|
+
raise InputValidationException(code="0-1-2-5", file=csv_path.name)
|
|
126
|
+
else:
|
|
127
|
+
raise InputValidationException(code="0-1-2-5", file=csv_path)
|
|
125
128
|
|
|
126
129
|
return _sanitize_pandas_columns(components, csv_path, data)
|
|
127
130
|
|
|
128
131
|
|
|
129
|
-
def _pandas_load_s3_csv(components: Dict[str, Component], csv_path: str) -> pd.DataFrame:
|
|
130
|
-
obj_dtypes = {comp_name: np.object_ for comp_name, comp in components.items()}
|
|
131
|
-
|
|
132
|
-
# start = time()
|
|
133
|
-
try:
|
|
134
|
-
data = pd.read_csv(
|
|
135
|
-
csv_path,
|
|
136
|
-
dtype=obj_dtypes,
|
|
137
|
-
engine="c",
|
|
138
|
-
keep_default_na=False,
|
|
139
|
-
na_values=[""],
|
|
140
|
-
)
|
|
141
|
-
|
|
142
|
-
except UnicodeDecodeError:
|
|
143
|
-
raise InputValidationException(code="0-1-2-5", file=csv_path)
|
|
144
|
-
except Exception as e:
|
|
145
|
-
raise InputValidationException(f"ERROR: {str(e)}, review file {str(csv_path)}")
|
|
146
|
-
return _sanitize_pandas_columns(components, csv_path, data)
|
|
147
|
-
|
|
148
|
-
|
|
149
132
|
def _parse_boolean(value: str) -> bool:
|
|
150
133
|
if isinstance(value, bool):
|
|
151
134
|
return value
|
|
@@ -240,10 +223,9 @@ def load_datapoints(
|
|
|
240
223
|
) -> pd.DataFrame:
|
|
241
224
|
if csv_path is None or (isinstance(csv_path, Path) and not csv_path.exists()):
|
|
242
225
|
return pd.DataFrame(columns=list(components.keys()))
|
|
243
|
-
elif isinstance(csv_path, str):
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
_validate_csv_path(components, csv_path)
|
|
226
|
+
elif isinstance(csv_path, (str, Path)):
|
|
227
|
+
if isinstance(csv_path, Path):
|
|
228
|
+
_validate_csv_path(components, csv_path)
|
|
247
229
|
data = _pandas_load_csv(components, csv_path)
|
|
248
230
|
else:
|
|
249
231
|
raise Exception("Invalid csv_path type")
|
|
@@ -1,32 +1,31 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: vtlengine
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.1rc1
|
|
4
4
|
Summary: Run and Validate VTL Scripts
|
|
5
5
|
License: AGPL-3.0
|
|
6
6
|
Keywords: vtl,sdmx,vtlengine,Validation and Transformation Language
|
|
7
7
|
Author: MeaningfulData
|
|
8
8
|
Author-email: info@meaningfuldata.eu
|
|
9
|
-
|
|
9
|
+
Maintainer: Francisco Javier Hernandez del Caño
|
|
10
|
+
Maintainer-email: javier.hernandez@meaningfuldata.eu
|
|
11
|
+
Requires-Python: >=3.9,<4
|
|
10
12
|
Classifier: Development Status :: 5 - Production/Stable
|
|
11
13
|
Classifier: Intended Audience :: Developers
|
|
12
14
|
Classifier: Intended Audience :: Information Technology
|
|
13
15
|
Classifier: Intended Audience :: Science/Research
|
|
14
|
-
Classifier: License :: OSI Approved :: GNU Affero General Public License v3
|
|
15
|
-
Classifier: Programming Language :: Python :: 3
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
-
Classifier: Programming Language :: Python :: 3.13
|
|
21
16
|
Classifier: Typing :: Typed
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
Requires-Dist:
|
|
25
|
-
Requires-Dist:
|
|
26
|
-
Requires-Dist:
|
|
27
|
-
Requires-Dist:
|
|
28
|
-
Requires-Dist:
|
|
29
|
-
Requires-Dist:
|
|
17
|
+
Provides-Extra: all
|
|
18
|
+
Provides-Extra: s3
|
|
19
|
+
Requires-Dist: antlr4-python3-runtime (>=4.9.2,<4.10)
|
|
20
|
+
Requires-Dist: duckdb (>=1.1,<1.2)
|
|
21
|
+
Requires-Dist: fsspec (>=2022.11.0,<2023.0) ; extra == "all"
|
|
22
|
+
Requires-Dist: fsspec (>=2022.11.0,<2023.0) ; extra == "s3"
|
|
23
|
+
Requires-Dist: jsonschema (>=3.2.0,<5.0)
|
|
24
|
+
Requires-Dist: networkx (>=2.8,<3.0)
|
|
25
|
+
Requires-Dist: pandas (>=2.1,<3.0)
|
|
26
|
+
Requires-Dist: s3fs (>=2022.11.0,<2023.0) ; extra == "all"
|
|
27
|
+
Requires-Dist: s3fs (>=2022.11.0,<2023.0) ; extra == "s3"
|
|
28
|
+
Requires-Dist: sqlglot (>=22.2.0,<23.0)
|
|
30
29
|
Project-URL: Authors, https://github.com/Meaningful-Data/vtlengine/graphs/contributors
|
|
31
30
|
Project-URL: Documentation, https://docs.vtlengine.meaningfuldata.eu
|
|
32
31
|
Project-URL: IssueTracker, https://github.com/Meaningful-Data/vtlengine/issues
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
vtlengine/API/_InternalApi.py,sha256=
|
|
1
|
+
vtlengine/API/_InternalApi.py,sha256=fz_JSIrLdXd29Tbg7FI6uZQE1wdOTlJPSZ27tPXHjeM,14825
|
|
2
2
|
vtlengine/API/__init__.py,sha256=2IDUvvSJdbkL5It8JulhmPCgkEnNbw52_VbbWm_aRp0,11061
|
|
3
3
|
vtlengine/API/data/schema/json_schema_2.1.json,sha256=v3-C0Xnq8qScJSPAtLgb3rjKMrd3nz-bIxgZdTSEUiU,4336
|
|
4
4
|
vtlengine/AST/ASTConstructor.py,sha256=DdE0B6CyPt1RYb3he6L0tL-KhZ1UyHRxQisGC1GuKx8,19692
|
|
@@ -45,14 +45,15 @@ vtlengine/Operators/Time.py,sha256=9f2kQ6iAoA4YPvlfphJ_uQjM-ZuqjSnOs312ttWMhgg,4
|
|
|
45
45
|
vtlengine/Operators/Validation.py,sha256=ev3HyU7e1XbeAtUQ1y6zY3fzBwMqetDPhG3NNveAGOE,9988
|
|
46
46
|
vtlengine/Operators/__init__.py,sha256=GN5eaAwmzfYKD7JJRIaRqdIJzflGc3UMvrOC9mlYNVo,37227
|
|
47
47
|
vtlengine/Utils/__init__.py,sha256=ZobqGLc4rpMrsmniexTD4J-VokQt3qLrBGdFEDHHT1M,7571
|
|
48
|
+
vtlengine/__extras_check.py,sha256=Wr-lxGZhXJZEacVV5cUkvKt7XM-mry0kYAe3VxNrVcY,614
|
|
48
49
|
vtlengine/__init__.py,sha256=L9tGzRGQ8HMDS23sVWIbBvj41sXR89pf0ZMzEidIEMM,89
|
|
49
50
|
vtlengine/files/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
50
|
-
vtlengine/files/output/__init__.py,sha256=
|
|
51
|
+
vtlengine/files/output/__init__.py,sha256=4tmf-p1Y1u5Ohrwt3clQA-FMGaijKI3HC_iwn3H9J8c,1250
|
|
51
52
|
vtlengine/files/output/_time_period_representation.py,sha256=D5XCSXyEuX_aBzTvBV3sZxACcgwXz2Uu_YH3loMP8q0,1610
|
|
52
|
-
vtlengine/files/parser/__init__.py,sha256=
|
|
53
|
+
vtlengine/files/parser/__init__.py,sha256=Kt1hFk6El0B2Fpi3sSC34x4r9BfG6V2pwDv67D0Z3xg,9057
|
|
53
54
|
vtlengine/files/parser/_rfc_dialect.py,sha256=0T8GshGA5z9ZgYStH7zz2ZwtdiGkj7B8jXcxsPkXfjs,488
|
|
54
55
|
vtlengine/files/parser/_time_checking.py,sha256=UAC_Pv-eQJKrhgTguWb--xfqMMs6quyMeiAkGBt_vgI,4725
|
|
55
|
-
vtlengine-1.
|
|
56
|
-
vtlengine-1.
|
|
57
|
-
vtlengine-1.
|
|
58
|
-
vtlengine-1.
|
|
56
|
+
vtlengine-1.1rc1.dist-info/LICENSE.md,sha256=2xqHuoHohba7gpcZZKtOICRjzeKsQANXG8WoV9V35KM,33893
|
|
57
|
+
vtlengine-1.1rc1.dist-info/METADATA,sha256=M6qsKt4xzOOOiQ7TdfDnqcSxLeqgqGouytXDAdRR3o0,8726
|
|
58
|
+
vtlengine-1.1rc1.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
|
59
|
+
vtlengine-1.1rc1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|