vtlengine 1.1rc2__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +288 -29
- vtlengine/API/__init__.py +277 -70
- vtlengine/AST/ASTComment.py +56 -0
- vtlengine/AST/ASTConstructor.py +71 -18
- vtlengine/AST/ASTConstructorModules/Expr.py +197 -75
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +81 -38
- vtlengine/AST/ASTConstructorModules/Terminals.py +76 -31
- vtlengine/AST/ASTConstructorModules/__init__.py +50 -0
- vtlengine/AST/ASTEncoders.py +4 -0
- vtlengine/AST/ASTString.py +622 -0
- vtlengine/AST/ASTTemplate.py +28 -2
- vtlengine/AST/DAG/__init__.py +44 -6
- vtlengine/AST/DAG/_words.py +1 -0
- vtlengine/AST/Grammar/Vtl.g4 +7 -7
- vtlengine/AST/Grammar/lexer.py +19759 -1112
- vtlengine/AST/Grammar/parser.py +17996 -3199
- vtlengine/AST/__init__.py +127 -14
- vtlengine/Exceptions/messages.py +14 -2
- vtlengine/Interpreter/__init__.py +90 -11
- vtlengine/Model/__init__.py +9 -4
- vtlengine/Operators/Aggregation.py +13 -6
- vtlengine/Operators/Analytic.py +19 -13
- vtlengine/Operators/CastOperator.py +5 -2
- vtlengine/Operators/Clause.py +26 -18
- vtlengine/Operators/Comparison.py +3 -1
- vtlengine/Operators/Conditional.py +40 -18
- vtlengine/Operators/General.py +3 -1
- vtlengine/Operators/HROperators.py +3 -1
- vtlengine/Operators/Join.py +4 -2
- vtlengine/Operators/Time.py +22 -15
- vtlengine/Operators/Validation.py +5 -2
- vtlengine/Operators/__init__.py +15 -8
- vtlengine/Utils/__Virtual_Assets.py +34 -0
- vtlengine/Utils/__init__.py +49 -0
- vtlengine/__init__.py +4 -2
- vtlengine/files/parser/__init__.py +16 -26
- vtlengine/files/parser/_rfc_dialect.py +1 -1
- vtlengine/py.typed +0 -0
- vtlengine-1.2.0.dist-info/METADATA +92 -0
- vtlengine-1.2.0.dist-info/RECORD +63 -0
- {vtlengine-1.1rc2.dist-info → vtlengine-1.2.0.dist-info}/WHEEL +1 -1
- vtlengine-1.1rc2.dist-info/METADATA +0 -248
- vtlengine-1.1rc2.dist-info/RECORD +0 -59
- {vtlengine-1.1rc2.dist-info → vtlengine-1.2.0.dist-info}/LICENSE.md +0 -0
vtlengine/API/__init__.py
CHANGED
|
@@ -1,29 +1,41 @@
|
|
|
1
|
+
import warnings
|
|
1
2
|
from pathlib import Path
|
|
2
|
-
from typing import Any, Dict, List, Optional, Union
|
|
3
|
+
from typing import Any, Dict, List, Optional, Sequence, Union
|
|
3
4
|
|
|
4
5
|
import pandas as pd
|
|
5
6
|
from antlr4 import CommonTokenStream, InputStream # type: ignore[import-untyped]
|
|
6
7
|
from antlr4.error.ErrorListener import ErrorListener # type: ignore[import-untyped]
|
|
8
|
+
from pysdmx.io.pd import PandasDataset
|
|
9
|
+
from pysdmx.model import DataflowRef, Reference, TransformationScheme
|
|
10
|
+
from pysdmx.model.dataflow import Dataflow, Schema
|
|
11
|
+
from pysdmx.model.vtl import VtlDataflowMapping
|
|
12
|
+
from pysdmx.util import parse_urn
|
|
7
13
|
|
|
8
14
|
from vtlengine.API._InternalApi import (
|
|
9
15
|
_check_output_folder,
|
|
16
|
+
_check_script,
|
|
10
17
|
_return_only_persistent_datasets,
|
|
18
|
+
ast_to_sdmx,
|
|
11
19
|
load_datasets,
|
|
12
20
|
load_datasets_with_data,
|
|
13
21
|
load_external_routines,
|
|
14
22
|
load_value_domains,
|
|
15
23
|
load_vtl,
|
|
24
|
+
to_vtl_json,
|
|
16
25
|
)
|
|
17
26
|
from vtlengine.AST import Start
|
|
18
27
|
from vtlengine.AST.ASTConstructor import ASTVisitor
|
|
28
|
+
from vtlengine.AST.ASTString import ASTString
|
|
19
29
|
from vtlengine.AST.DAG import DAGAnalyzer
|
|
20
30
|
from vtlengine.AST.Grammar.lexer import Lexer
|
|
21
31
|
from vtlengine.AST.Grammar.parser import Parser
|
|
32
|
+
from vtlengine.Exceptions import SemanticError
|
|
22
33
|
from vtlengine.files.output._time_period_representation import (
|
|
23
34
|
TimePeriodRepresentation,
|
|
24
35
|
format_time_period_external_representation,
|
|
25
36
|
)
|
|
26
37
|
from vtlengine.Interpreter import InterpreterAnalyzer
|
|
38
|
+
from vtlengine.Model import Dataset, Scalar
|
|
27
39
|
|
|
28
40
|
pd.options.mode.chained_assignment = None
|
|
29
41
|
|
|
@@ -68,6 +80,38 @@ def _parser(stream: CommonTokenStream) -> Any:
|
|
|
68
80
|
return vtl_parser.start()
|
|
69
81
|
|
|
70
82
|
|
|
83
|
+
def _extract_input_datasets(script: Union[str, TransformationScheme, Path]) -> str:
|
|
84
|
+
if isinstance(script, TransformationScheme):
|
|
85
|
+
vtl_script = _check_script(script)
|
|
86
|
+
elif isinstance(script, (str, Path)):
|
|
87
|
+
vtl_script = load_vtl(script)
|
|
88
|
+
else:
|
|
89
|
+
raise TypeError("Unsupported script type.")
|
|
90
|
+
|
|
91
|
+
ast = create_ast(vtl_script)
|
|
92
|
+
dag_inputs = DAGAnalyzer.ds_structure(ast)["global_inputs"]
|
|
93
|
+
|
|
94
|
+
return dag_inputs
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def prettify(script: Union[str, TransformationScheme, Path]) -> str:
|
|
98
|
+
"""
|
|
99
|
+
Function that prettifies the VTL script given.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
script: VTL script as a string, a Transformation Scheme object or Path with the VTL script.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
A str with the prettified VTL script.
|
|
106
|
+
"""
|
|
107
|
+
from vtlengine.AST.ASTComment import create_ast_with_comments
|
|
108
|
+
|
|
109
|
+
checking = _check_script(script)
|
|
110
|
+
vtl = load_vtl(checking)
|
|
111
|
+
ast = create_ast_with_comments(vtl)
|
|
112
|
+
return ASTString(pretty=True).render(ast)
|
|
113
|
+
|
|
114
|
+
|
|
71
115
|
def create_ast(text: str) -> Start:
|
|
72
116
|
"""
|
|
73
117
|
Function that creates the AST object.
|
|
@@ -90,30 +134,19 @@ def create_ast(text: str) -> Start:
|
|
|
90
134
|
|
|
91
135
|
|
|
92
136
|
def semantic_analysis(
|
|
93
|
-
script: Union[str, Path],
|
|
94
|
-
data_structures: Union[Dict[str, Any], Path, List[
|
|
137
|
+
script: Union[str, TransformationScheme, Path],
|
|
138
|
+
data_structures: Union[Dict[str, Any], Path, List[Dict[str, Any]], List[Path]],
|
|
95
139
|
value_domains: Optional[Union[Dict[str, Any], Path]] = None,
|
|
96
140
|
external_routines: Optional[Union[Dict[str, Any], Path]] = None,
|
|
97
|
-
) ->
|
|
141
|
+
) -> Dict[str, Dataset]:
|
|
98
142
|
"""
|
|
99
|
-
Checks if the vtl
|
|
100
|
-
|
|
143
|
+
Checks if the vtl scripts and its related datastructures are valid. As part of the compatibility
|
|
144
|
+
with pysdmx library, the vtl script can be a Transformation Scheme object, which availability as
|
|
145
|
+
input is going to be serialized as a string VTL script.
|
|
101
146
|
|
|
102
|
-
|
|
103
|
-
that contains the vtl file.
|
|
104
|
-
|
|
105
|
-
Moreover, the data structure can be a dictionary or a filepath to the folder that contains it.
|
|
106
|
-
|
|
107
|
-
If there are any value domains or external routines, this data is taken into account.
|
|
108
|
-
Both can be loaded the same way as data structures or vtl scripts are.
|
|
109
|
-
|
|
110
|
-
Finally, the :obj:`Interpreter <vtl-engine-spark.Interpreter.InterpreterAnalyzer>`
|
|
111
|
-
class takes all of this information and checks it with the ast generated to
|
|
112
|
-
return the semantic analysis result.
|
|
113
|
-
|
|
114
|
-
Concepts you may know:
|
|
147
|
+
Concepts you may need to know:
|
|
115
148
|
|
|
116
|
-
- Vtl script: The
|
|
149
|
+
- Vtl script: The script that shows the set of operations to be executed.
|
|
117
150
|
|
|
118
151
|
- Data Structure: JSON file that contains the structure and the name for the dataset(s) \
|
|
119
152
|
(and/or scalar) about the datatype (String, integer or number), \
|
|
@@ -126,7 +159,8 @@ def semantic_analysis(
|
|
|
126
159
|
This function has the following params:
|
|
127
160
|
|
|
128
161
|
Args:
|
|
129
|
-
script:
|
|
162
|
+
script: Vtl script as a string, Transformation Scheme object or Path to the folder \
|
|
163
|
+
that holds the vtl script.
|
|
130
164
|
data_structures: Dict or Path (file or folder), \
|
|
131
165
|
or List of Dicts or Paths with the data structures JSON files.
|
|
132
166
|
value_domains: Dict or Path of the value domains JSON files. (default: None)
|
|
@@ -139,12 +173,14 @@ def semantic_analysis(
|
|
|
139
173
|
Exception: If the files have the wrong format, or they do not exist, \
|
|
140
174
|
or their Paths are invalid.
|
|
141
175
|
"""
|
|
176
|
+
|
|
142
177
|
# AST generation
|
|
143
|
-
|
|
178
|
+
checking = _check_script(script)
|
|
179
|
+
vtl = load_vtl(checking)
|
|
144
180
|
ast = create_ast(vtl)
|
|
145
181
|
|
|
146
182
|
# Loading datasets
|
|
147
|
-
|
|
183
|
+
datasets, scalars = load_datasets(data_structures)
|
|
148
184
|
|
|
149
185
|
# Handling of library items
|
|
150
186
|
vd = None
|
|
@@ -156,9 +192,10 @@ def semantic_analysis(
|
|
|
156
192
|
|
|
157
193
|
# Running the interpreter
|
|
158
194
|
interpreter = InterpreterAnalyzer(
|
|
159
|
-
datasets=
|
|
195
|
+
datasets=datasets,
|
|
160
196
|
value_domains=vd,
|
|
161
197
|
external_routines=ext_routines,
|
|
198
|
+
scalars=scalars,
|
|
162
199
|
only_semantic=True,
|
|
163
200
|
)
|
|
164
201
|
result = interpreter.visit(ast)
|
|
@@ -166,30 +203,35 @@ def semantic_analysis(
|
|
|
166
203
|
|
|
167
204
|
|
|
168
205
|
def run(
|
|
169
|
-
script: Union[str, Path],
|
|
170
|
-
data_structures: Union[Dict[str, Any], Path, List[
|
|
171
|
-
datapoints: Union[Dict[str,
|
|
206
|
+
script: Union[str, TransformationScheme, Path],
|
|
207
|
+
data_structures: Union[Dict[str, Any], Path, List[Dict[str, Any]], List[Path]],
|
|
208
|
+
datapoints: Union[Dict[str, pd.DataFrame], str, Path, List[Dict[str, Any]], List[Path]],
|
|
172
209
|
value_domains: Optional[Union[Dict[str, Any], Path]] = None,
|
|
173
210
|
external_routines: Optional[Union[str, Path]] = None,
|
|
174
211
|
time_period_output_format: str = "vtl",
|
|
175
|
-
return_only_persistent: bool =
|
|
212
|
+
return_only_persistent: bool = True,
|
|
176
213
|
output_folder: Optional[Union[str, Path]] = None,
|
|
177
|
-
|
|
214
|
+
scalar_values: Optional[Dict[str, Optional[Union[int, str, bool, float]]]] = None,
|
|
215
|
+
) -> Dict[str, Union[Dataset, Scalar]]:
|
|
178
216
|
"""
|
|
179
|
-
Run is the main function of the ``API``, which mission is to
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
217
|
+
Run is the main function of the ``API``, which mission is to execute
|
|
218
|
+
the vtl operation over the data.
|
|
219
|
+
|
|
220
|
+
Concepts you may need to know:
|
|
221
|
+
|
|
222
|
+
- Vtl script: The script that shows the set of operations to be executed.
|
|
223
|
+
|
|
224
|
+
- Data Structure: JSON file that contains the structure and the name for the dataset(s) \
|
|
225
|
+
(and/or scalar) about the datatype (String, integer or number), \
|
|
226
|
+
the role (Identifier, Attribute or Measure) and the nullability each component has.
|
|
184
227
|
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
It can be a dictionary or a path to the JSON file or folder that contains it.
|
|
228
|
+
- Data point: `Pandas Dataframe \
|
|
229
|
+
<https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_ \
|
|
230
|
+
that holds the data related to the Dataset.
|
|
189
231
|
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
232
|
+
- Value domains: Collection of unique values on the same datatype.
|
|
233
|
+
|
|
234
|
+
- External routines: SQL query used to transform a dataset.
|
|
193
235
|
|
|
194
236
|
.. important::
|
|
195
237
|
The data structure and the data points must have the same dataset
|
|
@@ -212,35 +254,12 @@ def run(
|
|
|
212
254
|
For more details, see
|
|
213
255
|
`s3fs documentation <https://s3fs.readthedocs.io/en/latest/index.html#credentials>`_.
|
|
214
256
|
|
|
215
|
-
Before the execution, the DAG analysis reviews if the VTL script is a direct acyclic
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
If value domain data or external routines are required, the function loads this information
|
|
219
|
-
and integrates them into the
|
|
220
|
-
:obj:`Interpreter <vtl-engine-spark.Interpreter.InterpreterAnalyzer>` class.
|
|
221
|
-
|
|
222
|
-
Moreover, if any component has a Time Period component, the external representation
|
|
223
|
-
is passed to the Interpreter class.
|
|
224
|
-
|
|
225
|
-
Concepts you may need to know:
|
|
226
|
-
|
|
227
|
-
- Vtl script: The expression that shows the operation to be done.
|
|
228
|
-
|
|
229
|
-
- Data Structure: JSON file that contains the structure and the name for the dataset(s) \
|
|
230
|
-
(and/or scalar) about the datatype (String, integer or number), \
|
|
231
|
-
the role (Identifier, Attribute or Measure) and the nullability each component has.
|
|
232
|
-
|
|
233
|
-
- Data point: Pointer to the data. It will be loaded as a `Pandas Dataframe \
|
|
234
|
-
<https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_.
|
|
235
|
-
|
|
236
|
-
- Value domains: Collection of unique values that have the same datatype.
|
|
237
|
-
|
|
238
|
-
- External routines: SQL query used to transform a dataset.
|
|
257
|
+
Before the execution, the DAG analysis reviews if the VTL script is a direct acyclic graph.
|
|
239
258
|
|
|
240
259
|
This function has the following params:
|
|
241
260
|
|
|
242
261
|
Args:
|
|
243
|
-
script:
|
|
262
|
+
script: VTL script as a string, a Transformation Scheme object or Path with the VTL script.
|
|
244
263
|
|
|
245
264
|
data_structures: Dict, Path or a List of Dicts or Paths with the data structures.
|
|
246
265
|
|
|
@@ -255,10 +274,12 @@ def run(
|
|
|
255
274
|
Time Period components.
|
|
256
275
|
|
|
257
276
|
return_only_persistent: If True, run function will only return the results of \
|
|
258
|
-
Persistent Assignments. (default:
|
|
277
|
+
Persistent Assignments. (default: True)
|
|
259
278
|
|
|
260
279
|
output_folder: Path or S3 URI to the output folder. (default: None)
|
|
261
280
|
|
|
281
|
+
scalar_values: Dict with the scalar values to be used in the VTL script. \
|
|
282
|
+
|
|
262
283
|
|
|
263
284
|
Returns:
|
|
264
285
|
The datasets are produced without data if the output folder is defined.
|
|
@@ -268,12 +289,16 @@ def run(
|
|
|
268
289
|
or their Paths are invalid.
|
|
269
290
|
|
|
270
291
|
"""
|
|
292
|
+
|
|
271
293
|
# AST generation
|
|
294
|
+
script = _check_script(script)
|
|
272
295
|
vtl = load_vtl(script)
|
|
273
296
|
ast = create_ast(vtl)
|
|
274
297
|
|
|
275
298
|
# Loading datasets and datapoints
|
|
276
|
-
datasets, path_dict = load_datasets_with_data(
|
|
299
|
+
datasets, scalars, path_dict = load_datasets_with_data(
|
|
300
|
+
data_structures, datapoints, scalar_values
|
|
301
|
+
)
|
|
277
302
|
|
|
278
303
|
# Handling of library items
|
|
279
304
|
vd = None
|
|
@@ -302,15 +327,197 @@ def run(
|
|
|
302
327
|
datapoints_paths=path_dict,
|
|
303
328
|
output_path=output_folder,
|
|
304
329
|
time_period_representation=time_period_representation,
|
|
330
|
+
return_only_persistent=return_only_persistent,
|
|
331
|
+
scalars=scalars,
|
|
305
332
|
)
|
|
306
333
|
result = interpreter.visit(ast)
|
|
307
334
|
|
|
308
335
|
# Applying time period output format
|
|
309
336
|
if output_folder is None:
|
|
310
|
-
for
|
|
311
|
-
|
|
337
|
+
for obj in result.values():
|
|
338
|
+
if isinstance(obj, (Dataset, Scalar)):
|
|
339
|
+
format_time_period_external_representation(obj, time_period_representation)
|
|
312
340
|
|
|
313
341
|
# Returning only persistent datasets
|
|
314
342
|
if return_only_persistent:
|
|
315
343
|
return _return_only_persistent_datasets(result, ast)
|
|
316
344
|
return result
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
def run_sdmx( # noqa: C901
|
|
348
|
+
script: Union[str, TransformationScheme, Path],
|
|
349
|
+
datasets: Sequence[PandasDataset],
|
|
350
|
+
mappings: Optional[Union[VtlDataflowMapping, Dict[str, str]]] = None,
|
|
351
|
+
value_domains: Optional[Union[Dict[str, Any], Path]] = None,
|
|
352
|
+
external_routines: Optional[Union[str, Path]] = None,
|
|
353
|
+
time_period_output_format: str = "vtl",
|
|
354
|
+
return_only_persistent: bool = True,
|
|
355
|
+
output_folder: Optional[Union[str, Path]] = None,
|
|
356
|
+
) -> Dict[str, Union[Dataset, Scalar]]:
|
|
357
|
+
"""
|
|
358
|
+
Executes a VTL script using a list of pysdmx `PandasDataset` objects.
|
|
359
|
+
|
|
360
|
+
This function prepares the required VTL data structures and datapoints from
|
|
361
|
+
the given list of pysdmx `PandasDataset` objects. It validates each
|
|
362
|
+
`PandasDataset` uses a valid `Schema` instance as its structure. Each `Schema` is converted
|
|
363
|
+
to the appropriate VTL JSON data structure, and the Pandas Dataframe is extracted.
|
|
364
|
+
|
|
365
|
+
.. important::
|
|
366
|
+
We recommend to use this function in combination with the
|
|
367
|
+
`get_datasets <https://py.sdmx.io/howto/data_rw.html#pysdmx.io.get_datasets>`_
|
|
368
|
+
pysdmx method.
|
|
369
|
+
|
|
370
|
+
.. important::
|
|
371
|
+
The mapping between pysdmx `PandasDataset
|
|
372
|
+
<https://py.sdmx.io/howto/data_rw.html#pysdmx.io.pd.PandasDataset>`_ \
|
|
373
|
+
and VTL datasets is done using the `Schema` instance of the `PandasDataset`.
|
|
374
|
+
The Schema ID is used as the dataset name.
|
|
375
|
+
|
|
376
|
+
DataStructure=MD:TEST_DS(1.0) -> TEST_DS
|
|
377
|
+
|
|
378
|
+
The function then calls the :obj:`run <vtlengine.API>` function with the provided VTL
|
|
379
|
+
script and prepared inputs.
|
|
380
|
+
|
|
381
|
+
Before the execution, the DAG analysis reviews if the generated VTL script is a direct acyclic
|
|
382
|
+
graph.
|
|
383
|
+
|
|
384
|
+
Args:
|
|
385
|
+
script: VTL script as a string, a Transformation Scheme object or Path with the VTL script.
|
|
386
|
+
|
|
387
|
+
datasets: A list of PandasDataset.
|
|
388
|
+
|
|
389
|
+
mappings: A dictionary or VtlDataflowMapping object that maps the dataset names.
|
|
390
|
+
|
|
391
|
+
value_domains: Dict or Path of the value domains JSON files. (default:None)
|
|
392
|
+
|
|
393
|
+
external_routines: String or Path of the external routines SQL files. (default: None)
|
|
394
|
+
|
|
395
|
+
time_period_output_format: String with the possible values \
|
|
396
|
+
("sdmx_gregorian", "sdmx_reporting", "vtl") for the representation of the \
|
|
397
|
+
Time Period components.
|
|
398
|
+
|
|
399
|
+
return_only_persistent: If True, run function will only return the results of \
|
|
400
|
+
Persistent Assignments. (default: True)
|
|
401
|
+
|
|
402
|
+
output_folder: Path or S3 URI to the output folder. (default: None)
|
|
403
|
+
|
|
404
|
+
Returns:
|
|
405
|
+
The datasets are produced without data if the output folder is defined.
|
|
406
|
+
|
|
407
|
+
Raises:
|
|
408
|
+
SemanticError: If any dataset does not contain a valid `Schema` instance as its structure.
|
|
409
|
+
|
|
410
|
+
"""
|
|
411
|
+
mapping_dict = {}
|
|
412
|
+
input_names = _extract_input_datasets(script)
|
|
413
|
+
|
|
414
|
+
if not isinstance(datasets, (list, set)) or any(
|
|
415
|
+
not isinstance(ds, PandasDataset) for ds in datasets
|
|
416
|
+
):
|
|
417
|
+
type_ = type(datasets).__name__
|
|
418
|
+
if isinstance(datasets, (list, set)):
|
|
419
|
+
object_typing = {type(o).__name__ for o in datasets}
|
|
420
|
+
type_ = f"{type_}[{', '.join(object_typing)}]"
|
|
421
|
+
raise SemanticError("0-1-3-7", type_=type_)
|
|
422
|
+
|
|
423
|
+
# Mapping handling
|
|
424
|
+
if mappings is None:
|
|
425
|
+
if len(datasets) != 1:
|
|
426
|
+
raise SemanticError("0-1-3-3")
|
|
427
|
+
if len(datasets) == 1:
|
|
428
|
+
if len(input_names) != 1:
|
|
429
|
+
raise SemanticError("0-1-3-1", number_datasets=len(input_names))
|
|
430
|
+
schema = datasets[0].structure
|
|
431
|
+
if not isinstance(schema, Schema):
|
|
432
|
+
raise SemanticError("0-1-3-2", schema=schema)
|
|
433
|
+
mapping_dict = {schema.short_urn: input_names[0]}
|
|
434
|
+
elif isinstance(mappings, Dict):
|
|
435
|
+
mapping_dict = mappings
|
|
436
|
+
elif isinstance(mappings, VtlDataflowMapping):
|
|
437
|
+
if mappings.to_vtl_mapping_method is not None:
|
|
438
|
+
warnings.warn(
|
|
439
|
+
"To_vtl_mapping_method is not implemented yet, we will use the Basic "
|
|
440
|
+
"method with old data."
|
|
441
|
+
)
|
|
442
|
+
if mappings.from_vtl_mapping_method is not None:
|
|
443
|
+
warnings.warn(
|
|
444
|
+
"From_vtl_mapping_method is not implemented yet, we will use the Basic "
|
|
445
|
+
"method with old data."
|
|
446
|
+
)
|
|
447
|
+
if isinstance(mappings.dataflow, str):
|
|
448
|
+
short_urn = str(parse_urn(mappings.dataflow))
|
|
449
|
+
elif isinstance(mappings.dataflow, (Reference, DataflowRef)):
|
|
450
|
+
short_urn = str(mappings.dataflow)
|
|
451
|
+
elif isinstance(mappings.dataflow, Dataflow):
|
|
452
|
+
short_urn = mappings.dataflow.short_urn
|
|
453
|
+
else:
|
|
454
|
+
raise TypeError(
|
|
455
|
+
"Expected str, Reference, DataflowRef or Dataflow type for dataflow in "
|
|
456
|
+
"VtlDataflowMapping."
|
|
457
|
+
)
|
|
458
|
+
|
|
459
|
+
mapping_dict = {short_urn: mappings.dataflow_alias}
|
|
460
|
+
else:
|
|
461
|
+
raise TypeError("Expected dict or VtlDataflowMapping type for mappings.")
|
|
462
|
+
|
|
463
|
+
for vtl_name in mapping_dict.values():
|
|
464
|
+
if vtl_name not in input_names:
|
|
465
|
+
raise SemanticError("0-1-3-5", dataset_name=vtl_name)
|
|
466
|
+
|
|
467
|
+
datapoints = {}
|
|
468
|
+
data_structures = []
|
|
469
|
+
for dataset in datasets:
|
|
470
|
+
schema = dataset.structure
|
|
471
|
+
if not isinstance(schema, Schema):
|
|
472
|
+
raise SemanticError("0-1-3-2", schema=schema)
|
|
473
|
+
if schema.short_urn not in mapping_dict:
|
|
474
|
+
raise SemanticError("0-1-3-4", short_urn=schema.short_urn)
|
|
475
|
+
# Generating VTL Datastructure and Datapoints.
|
|
476
|
+
dataset_name = mapping_dict[schema.short_urn]
|
|
477
|
+
vtl_structure = to_vtl_json(schema, dataset_name)
|
|
478
|
+
data_structures.append(vtl_structure)
|
|
479
|
+
datapoints[dataset_name] = dataset.data
|
|
480
|
+
|
|
481
|
+
missing = []
|
|
482
|
+
for input_name in input_names:
|
|
483
|
+
if input_name not in mapping_dict.values():
|
|
484
|
+
missing.append(input_name)
|
|
485
|
+
if missing:
|
|
486
|
+
raise SemanticError("0-1-3-6", missing=missing)
|
|
487
|
+
|
|
488
|
+
result = run(
|
|
489
|
+
script=script,
|
|
490
|
+
data_structures=data_structures,
|
|
491
|
+
datapoints=datapoints,
|
|
492
|
+
value_domains=value_domains,
|
|
493
|
+
external_routines=external_routines,
|
|
494
|
+
time_period_output_format=time_period_output_format,
|
|
495
|
+
return_only_persistent=return_only_persistent,
|
|
496
|
+
output_folder=output_folder,
|
|
497
|
+
)
|
|
498
|
+
return result
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
def generate_sdmx(
|
|
502
|
+
script: Union[str, Path], agency_id: str, id: str, version: str = "1.0"
|
|
503
|
+
) -> TransformationScheme:
|
|
504
|
+
"""
|
|
505
|
+
Function that generates a TransformationScheme object from a VTL script.
|
|
506
|
+
|
|
507
|
+
The TransformationScheme object is the SDMX representation of the VTL script. \
|
|
508
|
+
For more details please check the `SDMX IM VTL objects \
|
|
509
|
+
<https://sdmx.org/wp-content/uploads/SDMX_3-0-0_SECTION_2_FINAL-1_0.pdf#page=146>`_, line 2266.
|
|
510
|
+
|
|
511
|
+
Args:
|
|
512
|
+
script: A string with the VTL script.
|
|
513
|
+
agency_id: The Agency ID used in the generated `TransformationScheme` object.
|
|
514
|
+
id: The given id of the generated `TransformationScheme` object.
|
|
515
|
+
version: The Version used in the generated `TransformationScheme` object. (default: "1.0")
|
|
516
|
+
|
|
517
|
+
Returns:
|
|
518
|
+
The generated Transformation Scheme object.
|
|
519
|
+
"""
|
|
520
|
+
vtl = load_vtl(script)
|
|
521
|
+
ast = create_ast(vtl)
|
|
522
|
+
result = ast_to_sdmx(ast, agency_id, id, version)
|
|
523
|
+
return result
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from antlr4 import CommonTokenStream, InputStream
|
|
2
|
+
from antlr4.Token import CommonToken
|
|
3
|
+
|
|
4
|
+
from vtlengine.API import create_ast
|
|
5
|
+
from vtlengine.AST import Comment, Start
|
|
6
|
+
from vtlengine.AST.ASTConstructorModules import extract_token_info
|
|
7
|
+
from vtlengine.AST.Grammar.lexer import Lexer
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def generate_ast_comment(token: CommonToken) -> Comment:
|
|
11
|
+
"""
|
|
12
|
+
Parses a token belonging to a comment and returns a Comment AST object.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
token (str): The comment string to parse.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
Comment: A Comment AST object.
|
|
19
|
+
"""
|
|
20
|
+
token_info = extract_token_info(token)
|
|
21
|
+
text = token.text
|
|
22
|
+
if token.type == Lexer.SL_COMMENT:
|
|
23
|
+
text = token.text[:-1] # Remove the trailing newline character
|
|
24
|
+
return Comment(value=text, **token_info)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def create_ast_with_comments(text: str) -> Start:
|
|
28
|
+
"""
|
|
29
|
+
Parses a VTL script and returns an AST with comments.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
text (str): The VTL script to parse.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
AST: The generated AST with comments.
|
|
36
|
+
"""
|
|
37
|
+
# Call the create_ast function to generate the AST from channel 0
|
|
38
|
+
ast = create_ast(text)
|
|
39
|
+
|
|
40
|
+
# Reading the script on channel 2 to get the comments
|
|
41
|
+
lexer_ = Lexer(InputStream(text))
|
|
42
|
+
stream = CommonTokenStream(lexer_, channel=2)
|
|
43
|
+
|
|
44
|
+
# Fill the stream with tokens on the buffer
|
|
45
|
+
stream.fill()
|
|
46
|
+
|
|
47
|
+
# Extract comments from the stream
|
|
48
|
+
comments = [generate_ast_comment(token) for token in stream.tokens if token.channel == 2]
|
|
49
|
+
|
|
50
|
+
# Add comments to the AST
|
|
51
|
+
ast.children.extend(comments)
|
|
52
|
+
|
|
53
|
+
# Sort the ast children based on their start line and column
|
|
54
|
+
ast.children.sort(key=lambda x: (x.line_start, x.column_start))
|
|
55
|
+
|
|
56
|
+
return ast
|