vtlengine 1.0__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +159 -102
- vtlengine/API/__init__.py +110 -68
- vtlengine/AST/ASTConstructor.py +188 -98
- vtlengine/AST/ASTConstructorModules/Expr.py +402 -205
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +248 -104
- vtlengine/AST/ASTConstructorModules/Terminals.py +158 -95
- vtlengine/AST/ASTEncoders.py +1 -1
- vtlengine/AST/ASTTemplate.py +24 -9
- vtlengine/AST/ASTVisitor.py +8 -12
- vtlengine/AST/DAG/__init__.py +43 -35
- vtlengine/AST/DAG/_words.py +4 -4
- vtlengine/AST/Grammar/Vtl.g4 +49 -20
- vtlengine/AST/Grammar/VtlTokens.g4 +13 -1
- vtlengine/AST/Grammar/lexer.py +2012 -1312
- vtlengine/AST/Grammar/parser.py +7524 -4343
- vtlengine/AST/Grammar/tokens.py +140 -128
- vtlengine/AST/VtlVisitor.py +16 -5
- vtlengine/AST/__init__.py +41 -11
- vtlengine/DataTypes/NumericTypesHandling.py +5 -4
- vtlengine/DataTypes/TimeHandling.py +196 -301
- vtlengine/DataTypes/__init__.py +304 -218
- vtlengine/Exceptions/__init__.py +96 -27
- vtlengine/Exceptions/messages.py +149 -69
- vtlengine/Interpreter/__init__.py +817 -497
- vtlengine/Model/__init__.py +172 -121
- vtlengine/Operators/Aggregation.py +156 -95
- vtlengine/Operators/Analytic.py +167 -79
- vtlengine/Operators/Assignment.py +7 -4
- vtlengine/Operators/Boolean.py +27 -32
- vtlengine/Operators/CastOperator.py +177 -131
- vtlengine/Operators/Clause.py +137 -99
- vtlengine/Operators/Comparison.py +148 -117
- vtlengine/Operators/Conditional.py +290 -98
- vtlengine/Operators/General.py +68 -47
- vtlengine/Operators/HROperators.py +91 -72
- vtlengine/Operators/Join.py +217 -118
- vtlengine/Operators/Numeric.py +129 -46
- vtlengine/Operators/RoleSetter.py +16 -15
- vtlengine/Operators/Set.py +61 -36
- vtlengine/Operators/String.py +213 -139
- vtlengine/Operators/Time.py +467 -215
- vtlengine/Operators/Validation.py +117 -76
- vtlengine/Operators/__init__.py +340 -213
- vtlengine/Utils/__init__.py +232 -41
- vtlengine/__init__.py +1 -1
- vtlengine/files/output/__init__.py +15 -6
- vtlengine/files/output/_time_period_representation.py +10 -9
- vtlengine/files/parser/__init__.py +79 -52
- vtlengine/files/parser/_rfc_dialect.py +6 -5
- vtlengine/files/parser/_time_checking.py +48 -37
- vtlengine-1.0.2.dist-info/METADATA +245 -0
- vtlengine-1.0.2.dist-info/RECORD +58 -0
- {vtlengine-1.0.dist-info → vtlengine-1.0.2.dist-info}/WHEEL +1 -1
- vtlengine-1.0.dist-info/METADATA +0 -104
- vtlengine-1.0.dist-info/RECORD +0 -58
- {vtlengine-1.0.dist-info → vtlengine-1.0.2.dist-info}/LICENSE.md +0 -0
vtlengine/API/_InternalApi.py
CHANGED
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Union, Optional, Dict, List
|
|
4
|
+
from typing import Union, Optional, Dict, List, Any
|
|
5
5
|
|
|
6
6
|
import pandas as pd
|
|
7
|
-
from s3fs import S3FileSystem
|
|
7
|
+
from s3fs import S3FileSystem # type: ignore[import-untyped]
|
|
8
8
|
|
|
9
9
|
from vtlengine.AST import PersistentAssignment, Start
|
|
10
10
|
from vtlengine.DataTypes import SCALAR_TYPES
|
|
11
|
-
from vtlengine.
|
|
11
|
+
from vtlengine.Exceptions import check_key
|
|
12
|
+
from vtlengine.Model import (ValueDomain, Dataset, Scalar, Component, Role,
|
|
13
|
+
ExternalRoutine, Role_keys)
|
|
12
14
|
from vtlengine.files.parser import _validate_pandas, _fill_dataset_empty_data
|
|
13
15
|
|
|
14
16
|
base_path = Path(__file__).parent
|
|
@@ -21,92 +23,98 @@ filepath_out_json = base_path / "data" / "DataStructure" / "output"
|
|
|
21
23
|
filepath_out_csv = base_path / "data" / "DataSet" / "output"
|
|
22
24
|
|
|
23
25
|
|
|
24
|
-
def _load_dataset_from_structure(structures:
|
|
26
|
+
def _load_dataset_from_structure(structures: Dict[str, Any]) -> Dict[str, Any]:
|
|
25
27
|
"""
|
|
26
28
|
Loads a dataset with the structure given.
|
|
27
29
|
"""
|
|
28
30
|
datasets = {}
|
|
29
31
|
|
|
30
|
-
if
|
|
31
|
-
for dataset_json in structures[
|
|
32
|
-
dataset_name = dataset_json[
|
|
33
|
-
components = {
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
32
|
+
if "datasets" in structures:
|
|
33
|
+
for dataset_json in structures["datasets"]:
|
|
34
|
+
dataset_name = dataset_json["name"]
|
|
35
|
+
components = {}
|
|
36
|
+
|
|
37
|
+
for component in dataset_json["DataStructure"]:
|
|
38
|
+
check_key("data_type", SCALAR_TYPES.keys(), component["type"])
|
|
39
|
+
check_key("role", Role_keys, component["role"])
|
|
40
|
+
components[component["name"]] = Component(
|
|
41
|
+
name=component["name"],
|
|
42
|
+
data_type=SCALAR_TYPES[component["type"]],
|
|
43
|
+
role=Role(component["role"]),
|
|
44
|
+
nullable=component["nullable"],
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
datasets[dataset_name] = Dataset(name=dataset_name, components=components, data=None)
|
|
48
|
+
if "scalars" in structures:
|
|
49
|
+
for scalar_json in structures["scalars"]:
|
|
50
|
+
scalar_name = scalar_json["name"]
|
|
51
|
+
scalar = Scalar(
|
|
52
|
+
name=scalar_name, data_type=SCALAR_TYPES[scalar_json["type"]], value=None
|
|
53
|
+
)
|
|
54
|
+
datasets[scalar_name] = scalar # type: ignore[assignment]
|
|
50
55
|
return datasets
|
|
51
56
|
|
|
52
57
|
|
|
53
|
-
def _load_single_datapoint(datapoint: Union[str, Path]):
|
|
58
|
+
def _load_single_datapoint(datapoint: Union[str, Path]) -> Dict[str, Any]:
|
|
54
59
|
"""
|
|
55
60
|
Returns a dict with the data given from one dataset.
|
|
56
61
|
"""
|
|
57
62
|
if not isinstance(datapoint, (Path, str)):
|
|
58
|
-
raise Exception(
|
|
63
|
+
raise Exception("Invalid datapoint. Input must be a Path or an S3 URI")
|
|
59
64
|
if isinstance(datapoint, str):
|
|
60
|
-
if
|
|
65
|
+
if "s3://" in datapoint:
|
|
61
66
|
# Handling S3 URI
|
|
62
67
|
s3fs_obj = S3FileSystem()
|
|
63
68
|
|
|
64
69
|
# Check if the S3 URI is valid
|
|
65
70
|
if not s3fs_obj.exists(datapoint):
|
|
66
71
|
raise Exception(
|
|
67
|
-
f
|
|
72
|
+
f"Invalid datapoint. S3 URI does not exist or it is not accessible: {datapoint}"
|
|
73
|
+
)
|
|
68
74
|
|
|
69
75
|
# Check if the S3 URI is a directory
|
|
70
76
|
if s3fs_obj.isdir(datapoint):
|
|
71
|
-
datapoints = {}
|
|
77
|
+
datapoints: Dict[str, Any] = {}
|
|
72
78
|
for f in s3fs_obj.ls(datapoint):
|
|
73
|
-
if f.endswith(
|
|
74
|
-
dataset_name = f.split(
|
|
79
|
+
if f.endswith(".csv"):
|
|
80
|
+
dataset_name = f.split("/")[-1].removesuffix(".csv")
|
|
75
81
|
dict_data = {dataset_name: f"s3://{f}"}
|
|
76
82
|
datapoints = {**datapoints, **dict_data}
|
|
77
83
|
return datapoints
|
|
78
84
|
|
|
79
85
|
# Check if the S3 URI is a csv file
|
|
80
|
-
if s3fs_obj.isfile(datapoint) and not datapoint.endswith(
|
|
81
|
-
raise Exception(f
|
|
82
|
-
dataset_name = datapoint.split(
|
|
86
|
+
if s3fs_obj.isfile(datapoint) and not datapoint.endswith(".csv"):
|
|
87
|
+
raise Exception(f"Invalid datapoint. S3 URI must refer to a csv file: {datapoint}")
|
|
88
|
+
dataset_name = datapoint.split("/")[-1].removesuffix(".csv")
|
|
83
89
|
dict_data = {dataset_name: datapoint}
|
|
84
90
|
return dict_data
|
|
85
91
|
|
|
86
92
|
try:
|
|
87
93
|
datapoint = Path(datapoint)
|
|
88
94
|
except Exception:
|
|
89
|
-
raise Exception(
|
|
95
|
+
raise Exception("Invalid datapoint. Input must refer to a Path or an S3 URI")
|
|
90
96
|
if datapoint.is_dir():
|
|
91
97
|
datapoints = {}
|
|
92
98
|
for f in datapoint.iterdir():
|
|
93
|
-
if f.suffix !=
|
|
99
|
+
if f.suffix != ".csv":
|
|
94
100
|
continue
|
|
95
101
|
dp = _load_single_datapoint(f)
|
|
96
102
|
datapoints = {**datapoints, **dp}
|
|
97
103
|
dict_data = datapoints
|
|
98
104
|
else:
|
|
99
|
-
dataset_name = datapoint.name.removesuffix(
|
|
100
|
-
dict_data = {dataset_name: datapoint}
|
|
105
|
+
dataset_name = datapoint.name.removesuffix(".csv")
|
|
106
|
+
dict_data = {dataset_name: datapoint} # type: ignore[dict-item]
|
|
101
107
|
return dict_data
|
|
102
108
|
|
|
103
109
|
|
|
104
|
-
def _load_datapoints_path(
|
|
110
|
+
def _load_datapoints_path(
|
|
111
|
+
datapoints: Union[Path, str, List[Union[str, Path]]]
|
|
112
|
+
) -> Dict[str, Dataset]:
|
|
105
113
|
"""
|
|
106
114
|
Returns a dict with the data given from a Path.
|
|
107
115
|
"""
|
|
108
116
|
if isinstance(datapoints, list):
|
|
109
|
-
dict_datapoints = {}
|
|
117
|
+
dict_datapoints: Dict[str, Any] = {}
|
|
110
118
|
for x in datapoints:
|
|
111
119
|
result = _load_single_datapoint(x)
|
|
112
120
|
dict_datapoints = {**dict_datapoints, **result}
|
|
@@ -114,40 +122,53 @@ def _load_datapoints_path(datapoints: Union[Path, str, List[Union[str, Path]]]):
|
|
|
114
122
|
return _load_single_datapoint(datapoints)
|
|
115
123
|
|
|
116
124
|
|
|
117
|
-
def _load_datastructure_single(data_structure: Union[
|
|
125
|
+
def _load_datastructure_single(data_structure: Union[Dict[str, Any], Path]) -> Dict[str, Dataset]:
|
|
118
126
|
"""
|
|
119
127
|
Loads a single data structure.
|
|
120
128
|
"""
|
|
121
129
|
if isinstance(data_structure, dict):
|
|
122
130
|
return _load_dataset_from_structure(data_structure)
|
|
123
131
|
if not isinstance(data_structure, Path):
|
|
124
|
-
raise Exception(
|
|
132
|
+
raise Exception("Invalid datastructure. Input must be a dict or Path object")
|
|
125
133
|
if not data_structure.exists():
|
|
126
|
-
raise Exception(
|
|
134
|
+
raise Exception("Invalid datastructure. Input does not exist")
|
|
127
135
|
if data_structure.is_dir():
|
|
128
|
-
datasets = {}
|
|
136
|
+
datasets: Dict[str, Any] = {}
|
|
129
137
|
for f in data_structure.iterdir():
|
|
130
|
-
if f.suffix !=
|
|
138
|
+
if f.suffix != ".json":
|
|
131
139
|
continue
|
|
132
140
|
dataset = _load_datastructure_single(f)
|
|
133
141
|
datasets = {**datasets, **dataset}
|
|
134
142
|
return datasets
|
|
135
143
|
else:
|
|
136
|
-
if data_structure.suffix !=
|
|
137
|
-
raise Exception(
|
|
138
|
-
with open(data_structure,
|
|
144
|
+
if data_structure.suffix != ".json":
|
|
145
|
+
raise Exception("Invalid datastructure. Must have .json extension")
|
|
146
|
+
with open(data_structure, "r") as file:
|
|
139
147
|
structures = json.load(file)
|
|
140
148
|
return _load_dataset_from_structure(structures)
|
|
141
149
|
|
|
142
150
|
|
|
143
|
-
def load_datasets(
|
|
151
|
+
def load_datasets(
|
|
152
|
+
data_structure: Union[Dict[str, Any], Path, List[Union[Dict[str, Any], Path]]]
|
|
153
|
+
) -> Dict[str, Dataset]:
|
|
144
154
|
"""
|
|
145
155
|
Loads multiple datasets.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
data_structure: Dict, Path or a List of dicts or Paths.
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
The datastructure as a dict or a list of datastructures as dicts. \
|
|
162
|
+
These dicts will have as keys the name, role, \
|
|
163
|
+
type and nullable of the data contained in the dataset.
|
|
164
|
+
|
|
165
|
+
Raises:
|
|
166
|
+
Exception: If the Path is invalid or datastructure has a wrong format.
|
|
146
167
|
"""
|
|
147
168
|
if isinstance(data_structure, dict):
|
|
148
169
|
return _load_datastructure_single(data_structure)
|
|
149
170
|
if isinstance(data_structure, list):
|
|
150
|
-
ds_structures = {}
|
|
171
|
+
ds_structures: Dict[str, Any] = {}
|
|
151
172
|
for x in data_structure:
|
|
152
173
|
result = _load_datastructure_single(x)
|
|
153
174
|
ds_structures = {**ds_structures, **result} # Overwrite ds_structures dict.
|
|
@@ -155,11 +176,19 @@ def load_datasets(data_structure: Union[dict, Path, List[Union[dict, Path]]]):
|
|
|
155
176
|
return _load_datastructure_single(data_structure)
|
|
156
177
|
|
|
157
178
|
|
|
158
|
-
def load_datasets_with_data(data_structures:
|
|
159
|
-
datapoints: Optional[Union[dict, Path, List[Path]]] = None):
|
|
179
|
+
def load_datasets_with_data(data_structures: Any, datapoints: Optional[Any] = None) -> Any:
|
|
160
180
|
"""
|
|
161
|
-
Loads the dataset structures and fills them with the data contained in the datapoints.
|
|
162
|
-
|
|
181
|
+
Loads the dataset structures and fills them with the data contained in the datapoints.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
data_structures: Dict, Path or a List of dicts or Paths.
|
|
185
|
+
datapoints: Dict, Path or a List of Paths.
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
A dict with the structure and a pandas dataframe with the data.
|
|
189
|
+
|
|
190
|
+
Raises:
|
|
191
|
+
Exception: If the Path is wrong or the file is invalid.
|
|
163
192
|
"""
|
|
164
193
|
datasets = load_datasets(data_structures)
|
|
165
194
|
if datapoints is None:
|
|
@@ -172,11 +201,14 @@ def load_datasets_with_data(data_structures: Union[dict, Path, List[Union[dict,
|
|
|
172
201
|
for dataset_name, data in datapoints.items():
|
|
173
202
|
if dataset_name not in datasets:
|
|
174
203
|
raise Exception(f"Not found dataset {dataset_name}")
|
|
175
|
-
datasets[dataset_name].data = _validate_pandas(
|
|
204
|
+
datasets[dataset_name].data = _validate_pandas(
|
|
205
|
+
datasets[dataset_name].components, data, dataset_name
|
|
206
|
+
)
|
|
176
207
|
for dataset_name in datasets:
|
|
177
208
|
if datasets[dataset_name].data is None:
|
|
178
209
|
datasets[dataset_name].data = pd.DataFrame(
|
|
179
|
-
columns=list(datasets[dataset_name].components.keys())
|
|
210
|
+
columns=list(datasets[dataset_name].components.keys())
|
|
211
|
+
)
|
|
180
212
|
return datasets, None
|
|
181
213
|
# Handling dictionary of paths
|
|
182
214
|
dict_datapoints = _load_datapoints_path(datapoints)
|
|
@@ -187,69 +219,88 @@ def load_datasets_with_data(data_structures: Union[dict, Path, List[Union[dict,
|
|
|
187
219
|
return datasets, dict_datapoints
|
|
188
220
|
|
|
189
221
|
|
|
190
|
-
def load_vtl(input: Union[str, Path]):
|
|
222
|
+
def load_vtl(input: Union[str, Path]) -> str:
|
|
191
223
|
"""
|
|
192
224
|
Reads the vtl expression.
|
|
193
225
|
|
|
194
|
-
:
|
|
226
|
+
Args:
|
|
227
|
+
input: String or Path of the vtl expression.
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
If it is a string, it will return the input as a string. \
|
|
231
|
+
If it is a Path, it will return the expression contained in the file as a string.
|
|
195
232
|
|
|
196
|
-
:
|
|
197
|
-
|
|
233
|
+
Raises:
|
|
234
|
+
Exception: If the vtl does not exist, if the Path is wrong, or if it is not a vtl file.
|
|
198
235
|
"""
|
|
199
236
|
if isinstance(input, str):
|
|
200
|
-
|
|
237
|
+
if os.path.exists(input):
|
|
238
|
+
input = Path(input)
|
|
239
|
+
else:
|
|
240
|
+
return input
|
|
201
241
|
if not isinstance(input, Path):
|
|
202
|
-
raise Exception(
|
|
242
|
+
raise Exception("Invalid vtl file. Input is not a Path object")
|
|
203
243
|
if not input.exists():
|
|
204
|
-
raise Exception(
|
|
205
|
-
if input.suffix !=
|
|
206
|
-
raise Exception(
|
|
207
|
-
with open(input,
|
|
244
|
+
raise Exception("Invalid vtl file. Input does not exist")
|
|
245
|
+
if input.suffix != ".vtl":
|
|
246
|
+
raise Exception("Invalid vtl file. Must have .vtl extension")
|
|
247
|
+
with open(input, "r") as f:
|
|
208
248
|
return f.read()
|
|
209
249
|
|
|
210
250
|
|
|
211
|
-
def _load_single_value_domain(input: Path):
|
|
212
|
-
if input.suffix !=
|
|
213
|
-
raise Exception(
|
|
214
|
-
with open(input,
|
|
251
|
+
def _load_single_value_domain(input: Path) -> Dict[str, ValueDomain]:
|
|
252
|
+
if input.suffix != ".json":
|
|
253
|
+
raise Exception("Invalid Value Domain file. Must have .json extension")
|
|
254
|
+
with open(input, "r") as f:
|
|
215
255
|
vd = ValueDomain.from_dict(json.load(f))
|
|
216
256
|
return {vd.name: vd}
|
|
217
257
|
|
|
218
258
|
|
|
219
|
-
def load_value_domains(input: Union[
|
|
259
|
+
def load_value_domains(input: Union[Dict[str, Any], Path]) -> Dict[str, ValueDomain]:
|
|
220
260
|
"""
|
|
221
261
|
Loads the value domains.
|
|
222
262
|
|
|
223
|
-
:
|
|
263
|
+
Args:
|
|
264
|
+
input: Dict or Path of the json file that contains the value domains data.
|
|
224
265
|
|
|
225
|
-
:
|
|
266
|
+
Returns:
|
|
267
|
+
A dictionary with the value domains data, or a list of dictionaries with them.
|
|
268
|
+
|
|
269
|
+
Raises:
|
|
270
|
+
Exception: If the value domains file is wrong, the Path is invalid, \
|
|
271
|
+
or the value domains file does not exist.
|
|
226
272
|
"""
|
|
227
273
|
if isinstance(input, dict):
|
|
228
274
|
vd = ValueDomain.from_dict(input)
|
|
229
275
|
return {vd.name: vd}
|
|
230
276
|
if not isinstance(input, Path):
|
|
231
|
-
raise Exception(
|
|
277
|
+
raise Exception("Invalid vd file. Input is not a Path object")
|
|
232
278
|
if not input.exists():
|
|
233
|
-
raise Exception(
|
|
279
|
+
raise Exception("Invalid vd file. Input does not exist")
|
|
234
280
|
if input.is_dir():
|
|
235
|
-
value_domains = {}
|
|
281
|
+
value_domains: Dict[str, Any] = {}
|
|
236
282
|
for f in input.iterdir():
|
|
237
283
|
vd = _load_single_value_domain(f)
|
|
238
284
|
value_domains = {**value_domains, **vd}
|
|
239
285
|
return value_domains
|
|
240
|
-
if input.suffix !=
|
|
241
|
-
raise Exception(
|
|
286
|
+
if input.suffix != ".json":
|
|
287
|
+
raise Exception("Invalid vd file. Must have .json extension")
|
|
242
288
|
return _load_single_value_domain(input)
|
|
243
289
|
|
|
244
290
|
|
|
245
|
-
def load_external_routines(input: Union[
|
|
246
|
-
Dict[str, ExternalRoutine]]:
|
|
291
|
+
def load_external_routines(input: Union[Dict[str, Any], Path, str]) -> Any:
|
|
247
292
|
"""
|
|
248
293
|
Load the external routines.
|
|
249
294
|
|
|
250
|
-
:
|
|
295
|
+
Args:
|
|
296
|
+
input: Dict or Path of the sql file that contains the external routine data.
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
A dictionary with the external routine data, or a list with \
|
|
300
|
+
the dictionaries from the Path given.
|
|
251
301
|
|
|
252
|
-
:
|
|
302
|
+
Raises:
|
|
303
|
+
Exception: If the sql file does not exist, the Path is wrong, or the file is not a sql one.
|
|
253
304
|
"""
|
|
254
305
|
external_routines = {}
|
|
255
306
|
if isinstance(input, dict):
|
|
@@ -258,12 +309,12 @@ def load_external_routines(input: Union[dict, Path]) -> Optional[
|
|
|
258
309
|
external_routines[ext_routine.name] = ext_routine
|
|
259
310
|
return external_routines
|
|
260
311
|
if not isinstance(input, Path):
|
|
261
|
-
raise Exception(
|
|
312
|
+
raise Exception("Input invalid. Input must be a sql file.")
|
|
262
313
|
if not input.exists():
|
|
263
|
-
raise Exception(
|
|
314
|
+
raise Exception("Input invalid. Input does not exist")
|
|
264
315
|
if input.is_dir():
|
|
265
316
|
for f in input.iterdir():
|
|
266
|
-
if f.suffix !=
|
|
317
|
+
if f.suffix != ".sql":
|
|
267
318
|
continue
|
|
268
319
|
ext_rout = _load_single_external_routine_from_file(f)
|
|
269
320
|
external_routines[ext_rout.name] = ext_rout
|
|
@@ -273,53 +324,59 @@ def load_external_routines(input: Union[dict, Path]) -> Optional[
|
|
|
273
324
|
return external_routines
|
|
274
325
|
|
|
275
326
|
|
|
276
|
-
def _return_only_persistent_datasets(
|
|
327
|
+
def _return_only_persistent_datasets(
|
|
328
|
+
datasets: Dict[str, Dataset], ast: Start
|
|
329
|
+
) -> Dict[str, Dataset]:
|
|
277
330
|
"""
|
|
278
331
|
Returns only the datasets with a persistent assignment.
|
|
279
332
|
"""
|
|
280
333
|
persistent = []
|
|
281
334
|
for child in ast.children:
|
|
282
|
-
if isinstance(child, PersistentAssignment):
|
|
335
|
+
if isinstance(child, PersistentAssignment) and hasattr(child.left, "value"):
|
|
283
336
|
persistent.append(child.left.value)
|
|
284
|
-
return {dataset.name: dataset for dataset in datasets.values() if
|
|
285
|
-
isinstance(dataset, Dataset) and dataset.name in persistent}
|
|
337
|
+
return {dataset.name: dataset for dataset in datasets.values() if dataset.name in persistent}
|
|
286
338
|
|
|
287
339
|
|
|
288
|
-
def _load_single_external_routine_from_file(input: Path):
|
|
340
|
+
def _load_single_external_routine_from_file(input: Path) -> Any:
|
|
289
341
|
"""
|
|
290
342
|
Returns a single external routine.
|
|
291
343
|
"""
|
|
292
344
|
if not isinstance(input, Path):
|
|
293
|
-
raise Exception(
|
|
345
|
+
raise Exception("Input invalid")
|
|
294
346
|
if not input.exists():
|
|
295
|
-
raise Exception(
|
|
296
|
-
if input.suffix !=
|
|
297
|
-
raise Exception(
|
|
298
|
-
with open(input,
|
|
299
|
-
ext_rout = ExternalRoutine.from_sql_query(input.name.removesuffix(
|
|
347
|
+
raise Exception("Input does not exist")
|
|
348
|
+
if input.suffix != ".sql":
|
|
349
|
+
raise Exception("Input must be a sql file")
|
|
350
|
+
with open(input, "r") as f:
|
|
351
|
+
ext_rout = ExternalRoutine.from_sql_query(input.name.removesuffix(".sql"), f.read())
|
|
300
352
|
return ext_rout
|
|
301
353
|
|
|
302
354
|
|
|
303
|
-
def _check_output_folder(output_folder: Union[str, Path]):
|
|
355
|
+
def _check_output_folder(output_folder: Union[str, Path]) -> None:
|
|
304
356
|
"""
|
|
305
357
|
Check if the output folder exists. If not, it will create it.
|
|
306
358
|
"""
|
|
307
359
|
if isinstance(output_folder, str):
|
|
308
|
-
if
|
|
360
|
+
if "s3://" in output_folder:
|
|
309
361
|
s3fs_obj = S3FileSystem()
|
|
310
362
|
# Check if the S3 URI is valid
|
|
311
363
|
if not s3fs_obj.exists(output_folder):
|
|
312
364
|
try:
|
|
313
365
|
s3fs_obj.mkdir(output_folder)
|
|
314
366
|
except Exception:
|
|
315
|
-
raise Exception(
|
|
367
|
+
raise Exception(
|
|
368
|
+
f"Invalid output folder. S3 URI is invalid or "
|
|
369
|
+
f"it is not accessible: {output_folder}"
|
|
370
|
+
)
|
|
316
371
|
return
|
|
317
372
|
try:
|
|
318
373
|
output_folder = Path(output_folder)
|
|
319
374
|
except Exception:
|
|
320
|
-
raise Exception(
|
|
375
|
+
raise Exception("Output folder must be a Path or S3 URI to a directory")
|
|
321
376
|
|
|
322
|
-
if not isinstance(output_folder, Path)
|
|
323
|
-
raise Exception(
|
|
377
|
+
if not isinstance(output_folder, Path):
|
|
378
|
+
raise Exception("Output folder must be a Path or S3 URI to a directory")
|
|
324
379
|
if not output_folder.exists():
|
|
380
|
+
if output_folder.suffix != "":
|
|
381
|
+
raise Exception("Output folder must be a Path or S3 URI to a directory")
|
|
325
382
|
os.mkdir(output_folder)
|