vtlengine 1.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +153 -100
- vtlengine/API/__init__.py +109 -67
- vtlengine/AST/ASTConstructor.py +188 -98
- vtlengine/AST/ASTConstructorModules/Expr.py +306 -200
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +172 -102
- vtlengine/AST/ASTConstructorModules/Terminals.py +158 -95
- vtlengine/AST/ASTEncoders.py +1 -1
- vtlengine/AST/ASTTemplate.py +8 -9
- vtlengine/AST/ASTVisitor.py +8 -12
- vtlengine/AST/DAG/__init__.py +43 -35
- vtlengine/AST/DAG/_words.py +4 -4
- vtlengine/AST/Grammar/lexer.py +732 -142
- vtlengine/AST/Grammar/parser.py +2188 -826
- vtlengine/AST/Grammar/tokens.py +128 -128
- vtlengine/AST/VtlVisitor.py +7 -4
- vtlengine/AST/__init__.py +22 -11
- vtlengine/DataTypes/NumericTypesHandling.py +5 -4
- vtlengine/DataTypes/TimeHandling.py +194 -301
- vtlengine/DataTypes/__init__.py +304 -218
- vtlengine/Exceptions/__init__.py +52 -27
- vtlengine/Exceptions/messages.py +134 -62
- vtlengine/Interpreter/__init__.py +781 -487
- vtlengine/Model/__init__.py +165 -121
- vtlengine/Operators/Aggregation.py +156 -95
- vtlengine/Operators/Analytic.py +115 -59
- vtlengine/Operators/Assignment.py +7 -4
- vtlengine/Operators/Boolean.py +27 -32
- vtlengine/Operators/CastOperator.py +177 -131
- vtlengine/Operators/Clause.py +137 -99
- vtlengine/Operators/Comparison.py +148 -117
- vtlengine/Operators/Conditional.py +149 -98
- vtlengine/Operators/General.py +68 -47
- vtlengine/Operators/HROperators.py +91 -72
- vtlengine/Operators/Join.py +217 -118
- vtlengine/Operators/Numeric.py +89 -44
- vtlengine/Operators/RoleSetter.py +16 -15
- vtlengine/Operators/Set.py +61 -36
- vtlengine/Operators/String.py +213 -139
- vtlengine/Operators/Time.py +334 -216
- vtlengine/Operators/Validation.py +117 -76
- vtlengine/Operators/__init__.py +340 -213
- vtlengine/Utils/__init__.py +195 -40
- vtlengine/__init__.py +1 -1
- vtlengine/files/output/__init__.py +15 -6
- vtlengine/files/output/_time_period_representation.py +10 -9
- vtlengine/files/parser/__init__.py +77 -52
- vtlengine/files/parser/_rfc_dialect.py +6 -5
- vtlengine/files/parser/_time_checking.py +46 -37
- vtlengine-1.0.1.dist-info/METADATA +236 -0
- vtlengine-1.0.1.dist-info/RECORD +58 -0
- {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/WHEEL +1 -1
- vtlengine-1.0.dist-info/METADATA +0 -104
- vtlengine-1.0.dist-info/RECORD +0 -58
- {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/LICENSE.md +0 -0
vtlengine/API/_InternalApi.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Union, Optional, Dict, List
|
|
4
|
+
from typing import Union, Optional, Dict, List, Any
|
|
5
5
|
|
|
6
6
|
import pandas as pd
|
|
7
|
-
from s3fs import S3FileSystem
|
|
7
|
+
from s3fs import S3FileSystem # type: ignore[import-untyped]
|
|
8
8
|
|
|
9
9
|
from vtlengine.AST import PersistentAssignment, Start
|
|
10
10
|
from vtlengine.DataTypes import SCALAR_TYPES
|
|
@@ -21,92 +21,96 @@ filepath_out_json = base_path / "data" / "DataStructure" / "output"
|
|
|
21
21
|
filepath_out_csv = base_path / "data" / "DataSet" / "output"
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
def _load_dataset_from_structure(structures:
|
|
24
|
+
def _load_dataset_from_structure(structures: Dict[str, Any]) -> Dict[str, Any]:
|
|
25
25
|
"""
|
|
26
26
|
Loads a dataset with the structure given.
|
|
27
27
|
"""
|
|
28
28
|
datasets = {}
|
|
29
29
|
|
|
30
|
-
if
|
|
31
|
-
for dataset_json in structures[
|
|
32
|
-
dataset_name = dataset_json[
|
|
30
|
+
if "datasets" in structures:
|
|
31
|
+
for dataset_json in structures["datasets"]:
|
|
32
|
+
dataset_name = dataset_json["name"]
|
|
33
33
|
components = {
|
|
34
|
-
component[
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
34
|
+
component["name"]: Component(
|
|
35
|
+
name=component["name"],
|
|
36
|
+
data_type=SCALAR_TYPES[component["type"]],
|
|
37
|
+
role=Role(component["role"]),
|
|
38
|
+
nullable=component["nullable"],
|
|
39
|
+
)
|
|
40
|
+
for component in dataset_json["DataStructure"]
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
datasets[dataset_name] = Dataset(name=dataset_name, components=components, data=None)
|
|
44
|
+
if "scalars" in structures:
|
|
45
|
+
for scalar_json in structures["scalars"]:
|
|
46
|
+
scalar_name = scalar_json["name"]
|
|
47
|
+
scalar = Scalar(
|
|
48
|
+
name=scalar_name, data_type=SCALAR_TYPES[scalar_json["type"]], value=None
|
|
49
|
+
)
|
|
50
|
+
datasets[scalar_name] = scalar # type: ignore[assignment]
|
|
50
51
|
return datasets
|
|
51
52
|
|
|
52
53
|
|
|
53
|
-
def _load_single_datapoint(datapoint: Union[str, Path]):
|
|
54
|
+
def _load_single_datapoint(datapoint: Union[str, Path]) -> Dict[str, Any]:
|
|
54
55
|
"""
|
|
55
56
|
Returns a dict with the data given from one dataset.
|
|
56
57
|
"""
|
|
57
58
|
if not isinstance(datapoint, (Path, str)):
|
|
58
|
-
raise Exception(
|
|
59
|
+
raise Exception("Invalid datapoint. Input must be a Path or an S3 URI")
|
|
59
60
|
if isinstance(datapoint, str):
|
|
60
|
-
if
|
|
61
|
+
if "s3://" in datapoint:
|
|
61
62
|
# Handling S3 URI
|
|
62
63
|
s3fs_obj = S3FileSystem()
|
|
63
64
|
|
|
64
65
|
# Check if the S3 URI is valid
|
|
65
66
|
if not s3fs_obj.exists(datapoint):
|
|
66
67
|
raise Exception(
|
|
67
|
-
f
|
|
68
|
+
f"Invalid datapoint. S3 URI does not exist or it is not accessible: {datapoint}"
|
|
69
|
+
)
|
|
68
70
|
|
|
69
71
|
# Check if the S3 URI is a directory
|
|
70
72
|
if s3fs_obj.isdir(datapoint):
|
|
71
|
-
datapoints = {}
|
|
73
|
+
datapoints: Dict[str, Any] = {}
|
|
72
74
|
for f in s3fs_obj.ls(datapoint):
|
|
73
|
-
if f.endswith(
|
|
74
|
-
dataset_name = f.split(
|
|
75
|
+
if f.endswith(".csv"):
|
|
76
|
+
dataset_name = f.split("/")[-1].removesuffix(".csv")
|
|
75
77
|
dict_data = {dataset_name: f"s3://{f}"}
|
|
76
78
|
datapoints = {**datapoints, **dict_data}
|
|
77
79
|
return datapoints
|
|
78
80
|
|
|
79
81
|
# Check if the S3 URI is a csv file
|
|
80
|
-
if s3fs_obj.isfile(datapoint) and not datapoint.endswith(
|
|
81
|
-
raise Exception(f
|
|
82
|
-
dataset_name = datapoint.split(
|
|
82
|
+
if s3fs_obj.isfile(datapoint) and not datapoint.endswith(".csv"):
|
|
83
|
+
raise Exception(f"Invalid datapoint. S3 URI must refer to a csv file: {datapoint}")
|
|
84
|
+
dataset_name = datapoint.split("/")[-1].removesuffix(".csv")
|
|
83
85
|
dict_data = {dataset_name: datapoint}
|
|
84
86
|
return dict_data
|
|
85
87
|
|
|
86
88
|
try:
|
|
87
89
|
datapoint = Path(datapoint)
|
|
88
90
|
except Exception:
|
|
89
|
-
raise Exception(
|
|
91
|
+
raise Exception("Invalid datapoint. Input must refer to a Path or an S3 URI")
|
|
90
92
|
if datapoint.is_dir():
|
|
91
93
|
datapoints = {}
|
|
92
94
|
for f in datapoint.iterdir():
|
|
93
|
-
if f.suffix !=
|
|
95
|
+
if f.suffix != ".csv":
|
|
94
96
|
continue
|
|
95
97
|
dp = _load_single_datapoint(f)
|
|
96
98
|
datapoints = {**datapoints, **dp}
|
|
97
99
|
dict_data = datapoints
|
|
98
100
|
else:
|
|
99
|
-
dataset_name = datapoint.name.removesuffix(
|
|
100
|
-
dict_data = {dataset_name: datapoint}
|
|
101
|
+
dataset_name = datapoint.name.removesuffix(".csv")
|
|
102
|
+
dict_data = {dataset_name: datapoint} # type: ignore[dict-item]
|
|
101
103
|
return dict_data
|
|
102
104
|
|
|
103
105
|
|
|
104
|
-
def _load_datapoints_path(
|
|
106
|
+
def _load_datapoints_path(
|
|
107
|
+
datapoints: Union[Path, str, List[Union[str, Path]]]
|
|
108
|
+
) -> Dict[str, Dataset]:
|
|
105
109
|
"""
|
|
106
110
|
Returns a dict with the data given from a Path.
|
|
107
111
|
"""
|
|
108
112
|
if isinstance(datapoints, list):
|
|
109
|
-
dict_datapoints = {}
|
|
113
|
+
dict_datapoints: Dict[str, Any] = {}
|
|
110
114
|
for x in datapoints:
|
|
111
115
|
result = _load_single_datapoint(x)
|
|
112
116
|
dict_datapoints = {**dict_datapoints, **result}
|
|
@@ -114,40 +118,53 @@ def _load_datapoints_path(datapoints: Union[Path, str, List[Union[str, Path]]]):
|
|
|
114
118
|
return _load_single_datapoint(datapoints)
|
|
115
119
|
|
|
116
120
|
|
|
117
|
-
def _load_datastructure_single(data_structure: Union[
|
|
121
|
+
def _load_datastructure_single(data_structure: Union[Dict[str, Any], Path]) -> Dict[str, Dataset]:
|
|
118
122
|
"""
|
|
119
123
|
Loads a single data structure.
|
|
120
124
|
"""
|
|
121
125
|
if isinstance(data_structure, dict):
|
|
122
126
|
return _load_dataset_from_structure(data_structure)
|
|
123
127
|
if not isinstance(data_structure, Path):
|
|
124
|
-
raise Exception(
|
|
128
|
+
raise Exception("Invalid datastructure. Input must be a dict or Path object")
|
|
125
129
|
if not data_structure.exists():
|
|
126
|
-
raise Exception(
|
|
130
|
+
raise Exception("Invalid datastructure. Input does not exist")
|
|
127
131
|
if data_structure.is_dir():
|
|
128
|
-
datasets = {}
|
|
132
|
+
datasets: Dict[str, Any] = {}
|
|
129
133
|
for f in data_structure.iterdir():
|
|
130
|
-
if f.suffix !=
|
|
134
|
+
if f.suffix != ".json":
|
|
131
135
|
continue
|
|
132
136
|
dataset = _load_datastructure_single(f)
|
|
133
137
|
datasets = {**datasets, **dataset}
|
|
134
138
|
return datasets
|
|
135
139
|
else:
|
|
136
|
-
if data_structure.suffix !=
|
|
137
|
-
raise Exception(
|
|
138
|
-
with open(data_structure,
|
|
140
|
+
if data_structure.suffix != ".json":
|
|
141
|
+
raise Exception("Invalid datastructure. Must have .json extension")
|
|
142
|
+
with open(data_structure, "r") as file:
|
|
139
143
|
structures = json.load(file)
|
|
140
144
|
return _load_dataset_from_structure(structures)
|
|
141
145
|
|
|
142
146
|
|
|
143
|
-
def load_datasets(
|
|
147
|
+
def load_datasets(
|
|
148
|
+
data_structure: Union[Dict[str, Any], Path, List[Union[Dict[str, Any], Path]]]
|
|
149
|
+
) -> Dict[str, Dataset]:
|
|
144
150
|
"""
|
|
145
151
|
Loads multiple datasets.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
data_structure: Dict, Path or a List of dicts or Paths.
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
The datastructure as a dict or a list of datastructures as dicts. \
|
|
158
|
+
These dicts will have as keys the name, role, \
|
|
159
|
+
type and nullable of the data contained in the dataset.
|
|
160
|
+
|
|
161
|
+
Raises:
|
|
162
|
+
Exception: If the Path is invalid or datastructure has a wrong format.
|
|
146
163
|
"""
|
|
147
164
|
if isinstance(data_structure, dict):
|
|
148
165
|
return _load_datastructure_single(data_structure)
|
|
149
166
|
if isinstance(data_structure, list):
|
|
150
|
-
ds_structures = {}
|
|
167
|
+
ds_structures: Dict[str, Any] = {}
|
|
151
168
|
for x in data_structure:
|
|
152
169
|
result = _load_datastructure_single(x)
|
|
153
170
|
ds_structures = {**ds_structures, **result} # Overwrite ds_structures dict.
|
|
@@ -155,11 +172,19 @@ def load_datasets(data_structure: Union[dict, Path, List[Union[dict, Path]]]):
|
|
|
155
172
|
return _load_datastructure_single(data_structure)
|
|
156
173
|
|
|
157
174
|
|
|
158
|
-
def load_datasets_with_data(data_structures:
|
|
159
|
-
datapoints: Optional[Union[dict, Path, List[Path]]] = None):
|
|
175
|
+
def load_datasets_with_data(data_structures: Any, datapoints: Optional[Any] = None) -> Any:
|
|
160
176
|
"""
|
|
161
|
-
Loads the dataset structures and fills them with the data contained in the datapoints.
|
|
162
|
-
|
|
177
|
+
Loads the dataset structures and fills them with the data contained in the datapoints.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
data_structures: Dict, Path or a List of dicts or Paths.
|
|
181
|
+
datapoints: Dict, Path or a List of Paths.
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
A dict with the structure and a pandas dataframe with the data.
|
|
185
|
+
|
|
186
|
+
Raises:
|
|
187
|
+
Exception: If the Path is wrong or the file is invalid.
|
|
163
188
|
"""
|
|
164
189
|
datasets = load_datasets(data_structures)
|
|
165
190
|
if datapoints is None:
|
|
@@ -172,11 +197,14 @@ def load_datasets_with_data(data_structures: Union[dict, Path, List[Union[dict,
|
|
|
172
197
|
for dataset_name, data in datapoints.items():
|
|
173
198
|
if dataset_name not in datasets:
|
|
174
199
|
raise Exception(f"Not found dataset {dataset_name}")
|
|
175
|
-
datasets[dataset_name].data = _validate_pandas(
|
|
200
|
+
datasets[dataset_name].data = _validate_pandas(
|
|
201
|
+
datasets[dataset_name].components, data, dataset_name
|
|
202
|
+
)
|
|
176
203
|
for dataset_name in datasets:
|
|
177
204
|
if datasets[dataset_name].data is None:
|
|
178
205
|
datasets[dataset_name].data = pd.DataFrame(
|
|
179
|
-
columns=list(datasets[dataset_name].components.keys())
|
|
206
|
+
columns=list(datasets[dataset_name].components.keys())
|
|
207
|
+
)
|
|
180
208
|
return datasets, None
|
|
181
209
|
# Handling dictionary of paths
|
|
182
210
|
dict_datapoints = _load_datapoints_path(datapoints)
|
|
@@ -187,69 +215,88 @@ def load_datasets_with_data(data_structures: Union[dict, Path, List[Union[dict,
|
|
|
187
215
|
return datasets, dict_datapoints
|
|
188
216
|
|
|
189
217
|
|
|
190
|
-
def load_vtl(input: Union[str, Path]):
|
|
218
|
+
def load_vtl(input: Union[str, Path]) -> str:
|
|
191
219
|
"""
|
|
192
220
|
Reads the vtl expression.
|
|
193
221
|
|
|
194
|
-
:
|
|
222
|
+
Args:
|
|
223
|
+
input: String or Path of the vtl expression.
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
If it is a string, it will return the input as a string. \
|
|
227
|
+
If it is a Path, it will return the expression contained in the file as a string.
|
|
195
228
|
|
|
196
|
-
:
|
|
197
|
-
|
|
229
|
+
Raises:
|
|
230
|
+
Exception: If the vtl does not exist, if the Path is wrong, or if it is not a vtl file.
|
|
198
231
|
"""
|
|
199
232
|
if isinstance(input, str):
|
|
200
|
-
|
|
233
|
+
if os.path.exists(input):
|
|
234
|
+
input = Path(input)
|
|
235
|
+
else:
|
|
236
|
+
return input
|
|
201
237
|
if not isinstance(input, Path):
|
|
202
|
-
raise Exception(
|
|
238
|
+
raise Exception("Invalid vtl file. Input is not a Path object")
|
|
203
239
|
if not input.exists():
|
|
204
|
-
raise Exception(
|
|
205
|
-
if input.suffix !=
|
|
206
|
-
raise Exception(
|
|
207
|
-
with open(input,
|
|
240
|
+
raise Exception("Invalid vtl file. Input does not exist")
|
|
241
|
+
if input.suffix != ".vtl":
|
|
242
|
+
raise Exception("Invalid vtl file. Must have .vtl extension")
|
|
243
|
+
with open(input, "r") as f:
|
|
208
244
|
return f.read()
|
|
209
245
|
|
|
210
246
|
|
|
211
|
-
def _load_single_value_domain(input: Path):
|
|
212
|
-
if input.suffix !=
|
|
213
|
-
raise Exception(
|
|
214
|
-
with open(input,
|
|
247
|
+
def _load_single_value_domain(input: Path) -> Dict[str, ValueDomain]:
|
|
248
|
+
if input.suffix != ".json":
|
|
249
|
+
raise Exception("Invalid Value Domain file. Must have .json extension")
|
|
250
|
+
with open(input, "r") as f:
|
|
215
251
|
vd = ValueDomain.from_dict(json.load(f))
|
|
216
252
|
return {vd.name: vd}
|
|
217
253
|
|
|
218
254
|
|
|
219
|
-
def load_value_domains(input: Union[
|
|
255
|
+
def load_value_domains(input: Union[Dict[str, Any], Path]) -> Dict[str, ValueDomain]:
|
|
220
256
|
"""
|
|
221
257
|
Loads the value domains.
|
|
222
258
|
|
|
223
|
-
:
|
|
259
|
+
Args:
|
|
260
|
+
input: Dict or Path of the json file that contains the value domains data.
|
|
224
261
|
|
|
225
|
-
:
|
|
262
|
+
Returns:
|
|
263
|
+
A dictionary with the value domains data, or a list of dictionaries with them.
|
|
264
|
+
|
|
265
|
+
Raises:
|
|
266
|
+
Exception: If the value domains file is wrong, the Path is invalid, \
|
|
267
|
+
or the value domains file does not exist.
|
|
226
268
|
"""
|
|
227
269
|
if isinstance(input, dict):
|
|
228
270
|
vd = ValueDomain.from_dict(input)
|
|
229
271
|
return {vd.name: vd}
|
|
230
272
|
if not isinstance(input, Path):
|
|
231
|
-
raise Exception(
|
|
273
|
+
raise Exception("Invalid vd file. Input is not a Path object")
|
|
232
274
|
if not input.exists():
|
|
233
|
-
raise Exception(
|
|
275
|
+
raise Exception("Invalid vd file. Input does not exist")
|
|
234
276
|
if input.is_dir():
|
|
235
|
-
value_domains = {}
|
|
277
|
+
value_domains: Dict[str, Any] = {}
|
|
236
278
|
for f in input.iterdir():
|
|
237
279
|
vd = _load_single_value_domain(f)
|
|
238
280
|
value_domains = {**value_domains, **vd}
|
|
239
281
|
return value_domains
|
|
240
|
-
if input.suffix !=
|
|
241
|
-
raise Exception(
|
|
282
|
+
if input.suffix != ".json":
|
|
283
|
+
raise Exception("Invalid vd file. Must have .json extension")
|
|
242
284
|
return _load_single_value_domain(input)
|
|
243
285
|
|
|
244
286
|
|
|
245
|
-
def load_external_routines(input: Union[
|
|
246
|
-
Dict[str, ExternalRoutine]]:
|
|
287
|
+
def load_external_routines(input: Union[Dict[str, Any], Path, str]) -> Any:
|
|
247
288
|
"""
|
|
248
289
|
Load the external routines.
|
|
249
290
|
|
|
250
|
-
:
|
|
291
|
+
Args:
|
|
292
|
+
input: Dict or Path of the sql file that contains the external routine data.
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
A dictionary with the external routine data, or a list with \
|
|
296
|
+
the dictionaries from the Path given.
|
|
251
297
|
|
|
252
|
-
:
|
|
298
|
+
Raises:
|
|
299
|
+
Exception: If the sql file does not exist, the Path is wrong, or the file is not a sql one.
|
|
253
300
|
"""
|
|
254
301
|
external_routines = {}
|
|
255
302
|
if isinstance(input, dict):
|
|
@@ -258,12 +305,12 @@ def load_external_routines(input: Union[dict, Path]) -> Optional[
|
|
|
258
305
|
external_routines[ext_routine.name] = ext_routine
|
|
259
306
|
return external_routines
|
|
260
307
|
if not isinstance(input, Path):
|
|
261
|
-
raise Exception(
|
|
308
|
+
raise Exception("Input invalid. Input must be a sql file.")
|
|
262
309
|
if not input.exists():
|
|
263
|
-
raise Exception(
|
|
310
|
+
raise Exception("Input invalid. Input does not exist")
|
|
264
311
|
if input.is_dir():
|
|
265
312
|
for f in input.iterdir():
|
|
266
|
-
if f.suffix !=
|
|
313
|
+
if f.suffix != ".sql":
|
|
267
314
|
continue
|
|
268
315
|
ext_rout = _load_single_external_routine_from_file(f)
|
|
269
316
|
external_routines[ext_rout.name] = ext_rout
|
|
@@ -273,53 +320,59 @@ def load_external_routines(input: Union[dict, Path]) -> Optional[
|
|
|
273
320
|
return external_routines
|
|
274
321
|
|
|
275
322
|
|
|
276
|
-
def _return_only_persistent_datasets(
|
|
323
|
+
def _return_only_persistent_datasets(
|
|
324
|
+
datasets: Dict[str, Dataset], ast: Start
|
|
325
|
+
) -> Dict[str, Dataset]:
|
|
277
326
|
"""
|
|
278
327
|
Returns only the datasets with a persistent assignment.
|
|
279
328
|
"""
|
|
280
329
|
persistent = []
|
|
281
330
|
for child in ast.children:
|
|
282
|
-
if isinstance(child, PersistentAssignment):
|
|
331
|
+
if isinstance(child, PersistentAssignment) and hasattr(child.left, "value"):
|
|
283
332
|
persistent.append(child.left.value)
|
|
284
|
-
return {dataset.name: dataset for dataset in datasets.values() if
|
|
285
|
-
isinstance(dataset, Dataset) and dataset.name in persistent}
|
|
333
|
+
return {dataset.name: dataset for dataset in datasets.values() if dataset.name in persistent}
|
|
286
334
|
|
|
287
335
|
|
|
288
|
-
def _load_single_external_routine_from_file(input: Path):
|
|
336
|
+
def _load_single_external_routine_from_file(input: Path) -> Any:
|
|
289
337
|
"""
|
|
290
338
|
Returns a single external routine.
|
|
291
339
|
"""
|
|
292
340
|
if not isinstance(input, Path):
|
|
293
|
-
raise Exception(
|
|
341
|
+
raise Exception("Input invalid")
|
|
294
342
|
if not input.exists():
|
|
295
|
-
raise Exception(
|
|
296
|
-
if input.suffix !=
|
|
297
|
-
raise Exception(
|
|
298
|
-
with open(input,
|
|
299
|
-
ext_rout = ExternalRoutine.from_sql_query(input.name.removesuffix(
|
|
343
|
+
raise Exception("Input does not exist")
|
|
344
|
+
if input.suffix != ".sql":
|
|
345
|
+
raise Exception("Input must be a sql file")
|
|
346
|
+
with open(input, "r") as f:
|
|
347
|
+
ext_rout = ExternalRoutine.from_sql_query(input.name.removesuffix(".sql"), f.read())
|
|
300
348
|
return ext_rout
|
|
301
349
|
|
|
302
350
|
|
|
303
|
-
def _check_output_folder(output_folder: Union[str, Path]):
|
|
351
|
+
def _check_output_folder(output_folder: Union[str, Path]) -> None:
|
|
304
352
|
"""
|
|
305
353
|
Check if the output folder exists. If not, it will create it.
|
|
306
354
|
"""
|
|
307
355
|
if isinstance(output_folder, str):
|
|
308
|
-
if
|
|
356
|
+
if "s3://" in output_folder:
|
|
309
357
|
s3fs_obj = S3FileSystem()
|
|
310
358
|
# Check if the S3 URI is valid
|
|
311
359
|
if not s3fs_obj.exists(output_folder):
|
|
312
360
|
try:
|
|
313
361
|
s3fs_obj.mkdir(output_folder)
|
|
314
362
|
except Exception:
|
|
315
|
-
raise Exception(
|
|
363
|
+
raise Exception(
|
|
364
|
+
f"Invalid output folder. S3 URI is invalid or "
|
|
365
|
+
f"it is not accessible: {output_folder}"
|
|
366
|
+
)
|
|
316
367
|
return
|
|
317
368
|
try:
|
|
318
369
|
output_folder = Path(output_folder)
|
|
319
370
|
except Exception:
|
|
320
|
-
raise Exception(
|
|
371
|
+
raise Exception("Output folder must be a Path or S3 URI to a directory")
|
|
321
372
|
|
|
322
|
-
if not isinstance(output_folder, Path)
|
|
323
|
-
raise Exception(
|
|
373
|
+
if not isinstance(output_folder, Path):
|
|
374
|
+
raise Exception("Output folder must be a Path or S3 URI to a directory")
|
|
324
375
|
if not output_folder.exists():
|
|
376
|
+
if output_folder.suffix != "":
|
|
377
|
+
raise Exception("Output folder must be a Path or S3 URI to a directory")
|
|
325
378
|
os.mkdir(output_folder)
|