vtlengine 1.0.4__py3-none-any.whl → 1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +236 -44
- vtlengine/API/__init__.py +258 -69
- vtlengine/AST/ASTComment.py +56 -0
- vtlengine/AST/ASTConstructor.py +71 -18
- vtlengine/AST/ASTConstructorModules/Expr.py +191 -72
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +81 -38
- vtlengine/AST/ASTConstructorModules/Terminals.py +76 -31
- vtlengine/AST/ASTConstructorModules/__init__.py +50 -0
- vtlengine/AST/ASTEncoders.py +4 -0
- vtlengine/AST/ASTString.py +608 -0
- vtlengine/AST/ASTTemplate.py +28 -2
- vtlengine/AST/DAG/__init__.py +10 -1
- vtlengine/AST/__init__.py +127 -14
- vtlengine/Exceptions/messages.py +9 -0
- vtlengine/Interpreter/__init__.py +53 -8
- vtlengine/Model/__init__.py +9 -4
- vtlengine/Operators/Aggregation.py +7 -5
- vtlengine/Operators/Analytic.py +16 -11
- vtlengine/Operators/Conditional.py +18 -3
- vtlengine/Operators/Time.py +9 -8
- vtlengine/Utils/__init__.py +49 -0
- vtlengine/__extras_check.py +17 -0
- vtlengine/__init__.py +2 -2
- vtlengine/files/output/__init__.py +2 -0
- vtlengine/files/parser/__init__.py +19 -47
- vtlengine/files/parser/_rfc_dialect.py +1 -1
- {vtlengine-1.0.4.dist-info → vtlengine-1.1.dist-info}/METADATA +20 -17
- {vtlengine-1.0.4.dist-info → vtlengine-1.1.dist-info}/RECORD +30 -27
- {vtlengine-1.0.4.dist-info → vtlengine-1.1.dist-info}/WHEEL +1 -1
- {vtlengine-1.0.4.dist-info → vtlengine-1.1.dist-info}/LICENSE.md +0 -0
vtlengine/API/__init__.py
CHANGED
|
@@ -1,29 +1,41 @@
|
|
|
1
|
+
import warnings
|
|
1
2
|
from pathlib import Path
|
|
2
|
-
from typing import Any, Dict, List, Optional, Union
|
|
3
|
+
from typing import Any, Dict, List, Optional, Sequence, Union
|
|
3
4
|
|
|
4
5
|
import pandas as pd
|
|
5
6
|
from antlr4 import CommonTokenStream, InputStream # type: ignore[import-untyped]
|
|
6
7
|
from antlr4.error.ErrorListener import ErrorListener # type: ignore[import-untyped]
|
|
8
|
+
from pysdmx.io.pd import PandasDataset
|
|
9
|
+
from pysdmx.model import DataflowRef, Reference, TransformationScheme
|
|
10
|
+
from pysdmx.model.dataflow import Dataflow, Schema
|
|
11
|
+
from pysdmx.model.vtl import VtlDataflowMapping
|
|
12
|
+
from pysdmx.util import parse_urn
|
|
7
13
|
|
|
8
14
|
from vtlengine.API._InternalApi import (
|
|
9
15
|
_check_output_folder,
|
|
16
|
+
_check_script,
|
|
10
17
|
_return_only_persistent_datasets,
|
|
18
|
+
ast_to_sdmx,
|
|
11
19
|
load_datasets,
|
|
12
20
|
load_datasets_with_data,
|
|
13
21
|
load_external_routines,
|
|
14
22
|
load_value_domains,
|
|
15
23
|
load_vtl,
|
|
24
|
+
to_vtl_json,
|
|
16
25
|
)
|
|
17
26
|
from vtlengine.AST import Start
|
|
18
27
|
from vtlengine.AST.ASTConstructor import ASTVisitor
|
|
28
|
+
from vtlengine.AST.ASTString import ASTString
|
|
19
29
|
from vtlengine.AST.DAG import DAGAnalyzer
|
|
20
30
|
from vtlengine.AST.Grammar.lexer import Lexer
|
|
21
31
|
from vtlengine.AST.Grammar.parser import Parser
|
|
32
|
+
from vtlengine.Exceptions import SemanticError
|
|
22
33
|
from vtlengine.files.output._time_period_representation import (
|
|
23
34
|
TimePeriodRepresentation,
|
|
24
35
|
format_time_period_external_representation,
|
|
25
36
|
)
|
|
26
37
|
from vtlengine.Interpreter import InterpreterAnalyzer
|
|
38
|
+
from vtlengine.Model import Dataset
|
|
27
39
|
|
|
28
40
|
pd.options.mode.chained_assignment = None
|
|
29
41
|
|
|
@@ -68,6 +80,38 @@ def _parser(stream: CommonTokenStream) -> Any:
|
|
|
68
80
|
return vtl_parser.start()
|
|
69
81
|
|
|
70
82
|
|
|
83
|
+
def _extract_input_datasets(script: Union[str, TransformationScheme, Path]) -> str:
|
|
84
|
+
if isinstance(script, TransformationScheme):
|
|
85
|
+
vtl_script = _check_script(script)
|
|
86
|
+
elif isinstance(script, (str, Path)):
|
|
87
|
+
vtl_script = load_vtl(script)
|
|
88
|
+
else:
|
|
89
|
+
raise TypeError("Unsupported script type.")
|
|
90
|
+
|
|
91
|
+
ast = create_ast(vtl_script)
|
|
92
|
+
dag_inputs = DAGAnalyzer.ds_structure(ast)["global_inputs"]
|
|
93
|
+
|
|
94
|
+
return dag_inputs
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def prettify(script: Union[str, TransformationScheme, Path]) -> str:
|
|
98
|
+
"""
|
|
99
|
+
Function that prettifies the VTL script given.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
script: VTL script as a string, a Transformation Scheme object or Path with the VTL script.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
A str with the prettified VTL script.
|
|
106
|
+
"""
|
|
107
|
+
from vtlengine.AST.ASTComment import create_ast_with_comments
|
|
108
|
+
|
|
109
|
+
checking = _check_script(script)
|
|
110
|
+
vtl = load_vtl(checking)
|
|
111
|
+
ast = create_ast_with_comments(vtl)
|
|
112
|
+
return ASTString(pretty=True).render(ast)
|
|
113
|
+
|
|
114
|
+
|
|
71
115
|
def create_ast(text: str) -> Start:
|
|
72
116
|
"""
|
|
73
117
|
Function that creates the AST object.
|
|
@@ -90,30 +134,19 @@ def create_ast(text: str) -> Start:
|
|
|
90
134
|
|
|
91
135
|
|
|
92
136
|
def semantic_analysis(
|
|
93
|
-
script: Union[str, Path],
|
|
94
|
-
data_structures: Union[Dict[str, Any], Path, List[
|
|
137
|
+
script: Union[str, TransformationScheme, Path],
|
|
138
|
+
data_structures: Union[Dict[str, Any], Path, List[Dict[str, Any]], List[Path]],
|
|
95
139
|
value_domains: Optional[Union[Dict[str, Any], Path]] = None,
|
|
96
140
|
external_routines: Optional[Union[Dict[str, Any], Path]] = None,
|
|
97
|
-
) ->
|
|
141
|
+
) -> Dict[str, Dataset]:
|
|
98
142
|
"""
|
|
99
|
-
Checks if the vtl
|
|
100
|
-
|
|
143
|
+
Checks if the vtl scripts and its related datastructures are valid. As part of the compatibility
|
|
144
|
+
with pysdmx library, the vtl script can be a Transformation Scheme object, which availability as
|
|
145
|
+
input is going to be serialized as a string VTL script.
|
|
101
146
|
|
|
102
|
-
|
|
103
|
-
that contains the vtl file.
|
|
104
|
-
|
|
105
|
-
Moreover, the data structure can be a dictionary or a filepath to the folder that contains it.
|
|
106
|
-
|
|
107
|
-
If there are any value domains or external routines, this data is taken into account.
|
|
108
|
-
Both can be loaded the same way as data structures or vtl scripts are.
|
|
109
|
-
|
|
110
|
-
Finally, the :obj:`Interpreter <vtl-engine-spark.Interpreter.InterpreterAnalyzer>`
|
|
111
|
-
class takes all of this information and checks it with the ast generated to
|
|
112
|
-
return the semantic analysis result.
|
|
113
|
-
|
|
114
|
-
Concepts you may know:
|
|
147
|
+
Concepts you may need to know:
|
|
115
148
|
|
|
116
|
-
- Vtl script: The
|
|
149
|
+
- Vtl script: The script that shows the set of operations to be executed.
|
|
117
150
|
|
|
118
151
|
- Data Structure: JSON file that contains the structure and the name for the dataset(s) \
|
|
119
152
|
(and/or scalar) about the datatype (String, integer or number), \
|
|
@@ -126,7 +159,8 @@ def semantic_analysis(
|
|
|
126
159
|
This function has the following params:
|
|
127
160
|
|
|
128
161
|
Args:
|
|
129
|
-
script:
|
|
162
|
+
script: Vtl script as a string, Transformation Scheme object or Path to the folder \
|
|
163
|
+
that holds the vtl script.
|
|
130
164
|
data_structures: Dict or Path (file or folder), \
|
|
131
165
|
or List of Dicts or Paths with the data structures JSON files.
|
|
132
166
|
value_domains: Dict or Path of the value domains JSON files. (default: None)
|
|
@@ -139,8 +173,10 @@ def semantic_analysis(
|
|
|
139
173
|
Exception: If the files have the wrong format, or they do not exist, \
|
|
140
174
|
or their Paths are invalid.
|
|
141
175
|
"""
|
|
176
|
+
|
|
142
177
|
# AST generation
|
|
143
|
-
|
|
178
|
+
checking = _check_script(script)
|
|
179
|
+
vtl = load_vtl(checking)
|
|
144
180
|
ast = create_ast(vtl)
|
|
145
181
|
|
|
146
182
|
# Loading datasets
|
|
@@ -161,36 +197,39 @@ def semantic_analysis(
|
|
|
161
197
|
external_routines=ext_routines,
|
|
162
198
|
only_semantic=True,
|
|
163
199
|
)
|
|
164
|
-
|
|
165
|
-
result = interpreter.visit(ast)
|
|
200
|
+
result = interpreter.visit(ast)
|
|
166
201
|
return result
|
|
167
202
|
|
|
168
203
|
|
|
169
204
|
def run(
|
|
170
|
-
script: Union[str, Path],
|
|
171
|
-
data_structures: Union[Dict[str, Any], Path, List[
|
|
172
|
-
datapoints: Union[Dict[str,
|
|
205
|
+
script: Union[str, TransformationScheme, Path],
|
|
206
|
+
data_structures: Union[Dict[str, Any], Path, List[Dict[str, Any]], List[Path]],
|
|
207
|
+
datapoints: Union[Dict[str, pd.DataFrame], str, Path, List[Dict[str, Any]], List[Path]],
|
|
173
208
|
value_domains: Optional[Union[Dict[str, Any], Path]] = None,
|
|
174
209
|
external_routines: Optional[Union[str, Path]] = None,
|
|
175
210
|
time_period_output_format: str = "vtl",
|
|
176
|
-
return_only_persistent: bool =
|
|
211
|
+
return_only_persistent: bool = True,
|
|
177
212
|
output_folder: Optional[Union[str, Path]] = None,
|
|
178
|
-
) ->
|
|
213
|
+
) -> Dict[str, Dataset]:
|
|
179
214
|
"""
|
|
180
|
-
Run is the main function of the ``API``, which mission is to
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
At the same time, data structures are loaded with its datapoints.
|
|
215
|
+
Run is the main function of the ``API``, which mission is to execute
|
|
216
|
+
the vtl operation over the data.
|
|
217
|
+
|
|
218
|
+
Concepts you may need to know:
|
|
185
219
|
|
|
186
|
-
|
|
187
|
-
and establish the datatype (string, integer or number),
|
|
188
|
-
and the role that each component is going to have (Identifier, Attribute or Measure).
|
|
189
|
-
It can be a dictionary or a path to the JSON file or folder that contains it.
|
|
220
|
+
- Vtl script: The script that shows the set of operations to be executed.
|
|
190
221
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
222
|
+
- Data Structure: JSON file that contains the structure and the name for the dataset(s) \
|
|
223
|
+
(and/or scalar) about the datatype (String, integer or number), \
|
|
224
|
+
the role (Identifier, Attribute or Measure) and the nullability each component has.
|
|
225
|
+
|
|
226
|
+
- Data point: `Pandas Dataframe \
|
|
227
|
+
<https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_ \
|
|
228
|
+
that holds the data related to the Dataset.
|
|
229
|
+
|
|
230
|
+
- Value domains: Collection of unique values on the same datatype.
|
|
231
|
+
|
|
232
|
+
- External routines: SQL query used to transform a dataset.
|
|
194
233
|
|
|
195
234
|
.. important::
|
|
196
235
|
The data structure and the data points must have the same dataset
|
|
@@ -213,35 +252,12 @@ def run(
|
|
|
213
252
|
For more details, see
|
|
214
253
|
`s3fs documentation <https://s3fs.readthedocs.io/en/latest/index.html#credentials>`_.
|
|
215
254
|
|
|
216
|
-
Before the execution, the DAG analysis reviews if the VTL script is a direct acyclic
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
If value domain data or external routines are required, the function loads this information
|
|
220
|
-
and integrates them into the
|
|
221
|
-
:obj:`Interpreter <vtl-engine-spark.Interpreter.InterpreterAnalyzer>` class.
|
|
222
|
-
|
|
223
|
-
Moreover, if any component has a Time Period component, the external representation
|
|
224
|
-
is passed to the Interpreter class.
|
|
225
|
-
|
|
226
|
-
Concepts you may need to know:
|
|
227
|
-
|
|
228
|
-
- Vtl script: The expression that shows the operation to be done.
|
|
229
|
-
|
|
230
|
-
- Data Structure: JSON file that contains the structure and the name for the dataset(s) \
|
|
231
|
-
(and/or scalar) about the datatype (String, integer or number), \
|
|
232
|
-
the role (Identifier, Attribute or Measure) and the nullability each component has.
|
|
233
|
-
|
|
234
|
-
- Data point: Pointer to the data. It will be loaded as a `Pandas Dataframe \
|
|
235
|
-
<https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_.
|
|
236
|
-
|
|
237
|
-
- Value domains: Collection of unique values that have the same datatype.
|
|
238
|
-
|
|
239
|
-
- External routines: SQL query used to transform a dataset.
|
|
255
|
+
Before the execution, the DAG analysis reviews if the VTL script is a direct acyclic graph.
|
|
240
256
|
|
|
241
257
|
This function has the following params:
|
|
242
258
|
|
|
243
259
|
Args:
|
|
244
|
-
script:
|
|
260
|
+
script: VTL script as a string, a Transformation Scheme object or Path with the VTL script.
|
|
245
261
|
|
|
246
262
|
data_structures: Dict, Path or a List of Dicts or Paths with the data structures.
|
|
247
263
|
|
|
@@ -256,7 +272,7 @@ def run(
|
|
|
256
272
|
Time Period components.
|
|
257
273
|
|
|
258
274
|
return_only_persistent: If True, run function will only return the results of \
|
|
259
|
-
Persistent Assignments. (default:
|
|
275
|
+
Persistent Assignments. (default: True)
|
|
260
276
|
|
|
261
277
|
output_folder: Path or S3 URI to the output folder. (default: None)
|
|
262
278
|
|
|
@@ -269,7 +285,9 @@ def run(
|
|
|
269
285
|
or their Paths are invalid.
|
|
270
286
|
|
|
271
287
|
"""
|
|
288
|
+
|
|
272
289
|
# AST generation
|
|
290
|
+
script = _check_script(script)
|
|
273
291
|
vtl = load_vtl(script)
|
|
274
292
|
ast = create_ast(vtl)
|
|
275
293
|
|
|
@@ -303,9 +321,9 @@ def run(
|
|
|
303
321
|
datapoints_paths=path_dict,
|
|
304
322
|
output_path=output_folder,
|
|
305
323
|
time_period_representation=time_period_representation,
|
|
324
|
+
return_only_persistent=return_only_persistent,
|
|
306
325
|
)
|
|
307
|
-
|
|
308
|
-
result = interpreter.visit(ast)
|
|
326
|
+
result = interpreter.visit(ast)
|
|
309
327
|
|
|
310
328
|
# Applying time period output format
|
|
311
329
|
if output_folder is None:
|
|
@@ -316,3 +334,174 @@ def run(
|
|
|
316
334
|
if return_only_persistent:
|
|
317
335
|
return _return_only_persistent_datasets(result, ast)
|
|
318
336
|
return result
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def run_sdmx( # noqa: C901
|
|
340
|
+
script: Union[str, TransformationScheme, Path],
|
|
341
|
+
datasets: Sequence[PandasDataset],
|
|
342
|
+
mappings: Optional[Union[VtlDataflowMapping, Dict[str, str]]] = None,
|
|
343
|
+
value_domains: Optional[Union[Dict[str, Any], Path]] = None,
|
|
344
|
+
external_routines: Optional[Union[str, Path]] = None,
|
|
345
|
+
time_period_output_format: str = "vtl",
|
|
346
|
+
return_only_persistent: bool = True,
|
|
347
|
+
output_folder: Optional[Union[str, Path]] = None,
|
|
348
|
+
) -> Dict[str, Dataset]:
|
|
349
|
+
"""
|
|
350
|
+
Executes a VTL script using a list of pysdmx `PandasDataset` objects.
|
|
351
|
+
|
|
352
|
+
This function prepares the required VTL data structures and datapoints from
|
|
353
|
+
the given list of pysdmx `PandasDataset` objects. It validates each
|
|
354
|
+
`PandasDataset` uses a valid `Schema` instance as its structure. Each `Schema` is converted
|
|
355
|
+
to the appropriate VTL JSON data structure, and the Pandas Dataframe is extracted.
|
|
356
|
+
|
|
357
|
+
.. important::
|
|
358
|
+
We recommend to use this function in combination with the
|
|
359
|
+
`get_datasets <https://py.sdmx.io/howto/data_rw.html#pysdmx.io.get_datasets>`_
|
|
360
|
+
pysdmx method.
|
|
361
|
+
|
|
362
|
+
.. important::
|
|
363
|
+
The mapping between pysdmx `PandasDataset
|
|
364
|
+
<https://py.sdmx.io/howto/data_rw.html#pysdmx.io.pd.PandasDataset>`_ \
|
|
365
|
+
and VTL datasets is done using the `Schema` instance of the `PandasDataset`.
|
|
366
|
+
The Schema ID is used as the dataset name.
|
|
367
|
+
|
|
368
|
+
DataStructure=MD:TEST_DS(1.0) -> TEST_DS
|
|
369
|
+
|
|
370
|
+
The function then calls the :obj:`run <vtlengine.API>` function with the provided VTL
|
|
371
|
+
script and prepared inputs.
|
|
372
|
+
|
|
373
|
+
Before the execution, the DAG analysis reviews if the generated VTL script is a direct acyclic
|
|
374
|
+
graph.
|
|
375
|
+
|
|
376
|
+
Args:
|
|
377
|
+
script: VTL script as a string, a Transformation Scheme object or Path with the VTL script.
|
|
378
|
+
|
|
379
|
+
datasets: A list of PandasDataset.
|
|
380
|
+
|
|
381
|
+
mappings: A dictionary or VtlDataflowMapping object that maps the dataset names.
|
|
382
|
+
|
|
383
|
+
value_domains: Dict or Path of the value domains JSON files. (default:None)
|
|
384
|
+
|
|
385
|
+
external_routines: String or Path of the external routines SQL files. (default: None)
|
|
386
|
+
|
|
387
|
+
time_period_output_format: String with the possible values \
|
|
388
|
+
("sdmx_gregorian", "sdmx_reporting", "vtl") for the representation of the \
|
|
389
|
+
Time Period components.
|
|
390
|
+
|
|
391
|
+
return_only_persistent: If True, run function will only return the results of \
|
|
392
|
+
Persistent Assignments. (default: True)
|
|
393
|
+
|
|
394
|
+
output_folder: Path or S3 URI to the output folder. (default: None)
|
|
395
|
+
|
|
396
|
+
Returns:
|
|
397
|
+
The datasets are produced without data if the output folder is defined.
|
|
398
|
+
|
|
399
|
+
Raises:
|
|
400
|
+
SemanticError: If any dataset does not contain a valid `Schema` instance as its structure.
|
|
401
|
+
|
|
402
|
+
"""
|
|
403
|
+
mapping_dict = {}
|
|
404
|
+
input_names = _extract_input_datasets(script)
|
|
405
|
+
|
|
406
|
+
# Mapping handling
|
|
407
|
+
|
|
408
|
+
if mappings is None:
|
|
409
|
+
if len(datasets) != 1:
|
|
410
|
+
raise SemanticError("0-1-3-3")
|
|
411
|
+
if len(datasets) == 1:
|
|
412
|
+
if len(input_names) != 1:
|
|
413
|
+
raise SemanticError("0-1-3-1", number_datasets=len(input_names))
|
|
414
|
+
schema = datasets[0].structure
|
|
415
|
+
if not isinstance(schema, Schema):
|
|
416
|
+
raise SemanticError("0-1-3-2", schema=schema)
|
|
417
|
+
mapping_dict = {schema.short_urn: input_names[0]}
|
|
418
|
+
elif isinstance(mappings, Dict):
|
|
419
|
+
mapping_dict = mappings
|
|
420
|
+
elif isinstance(mappings, VtlDataflowMapping):
|
|
421
|
+
if mappings.to_vtl_mapping_method is not None:
|
|
422
|
+
warnings.warn(
|
|
423
|
+
"To_vtl_mapping_method is not implemented yet, we will use the Basic "
|
|
424
|
+
"method with old data."
|
|
425
|
+
)
|
|
426
|
+
if mappings.from_vtl_mapping_method is not None:
|
|
427
|
+
warnings.warn(
|
|
428
|
+
"From_vtl_mapping_method is not implemented yet, we will use the Basic "
|
|
429
|
+
"method with old data."
|
|
430
|
+
)
|
|
431
|
+
if isinstance(mappings.dataflow, str):
|
|
432
|
+
short_urn = str(parse_urn(mappings.dataflow))
|
|
433
|
+
elif isinstance(mappings.dataflow, (Reference, DataflowRef)):
|
|
434
|
+
short_urn = str(mappings.dataflow)
|
|
435
|
+
elif isinstance(mappings.dataflow, Dataflow):
|
|
436
|
+
short_urn = mappings.dataflow.short_urn
|
|
437
|
+
else:
|
|
438
|
+
raise TypeError(
|
|
439
|
+
"Expected str, Reference, DataflowRef or Dataflow type for dataflow in "
|
|
440
|
+
"VtlDataflowMapping."
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
mapping_dict = {short_urn: mappings.dataflow_alias}
|
|
444
|
+
else:
|
|
445
|
+
raise TypeError("Expected dict or VtlDataflowMapping type for mappings.")
|
|
446
|
+
|
|
447
|
+
for vtl_name in mapping_dict.values():
|
|
448
|
+
if vtl_name not in input_names:
|
|
449
|
+
raise SemanticError("0-1-3-5", dataset_name=vtl_name)
|
|
450
|
+
|
|
451
|
+
datapoints = {}
|
|
452
|
+
data_structures = []
|
|
453
|
+
for dataset in datasets:
|
|
454
|
+
schema = dataset.structure
|
|
455
|
+
if not isinstance(schema, Schema):
|
|
456
|
+
raise SemanticError("0-1-3-2", schema=schema)
|
|
457
|
+
if schema.short_urn not in mapping_dict:
|
|
458
|
+
raise SemanticError("0-1-3-4", short_urn=schema.short_urn)
|
|
459
|
+
# Generating VTL Datastructure and Datapoints.
|
|
460
|
+
dataset_name = mapping_dict[schema.short_urn]
|
|
461
|
+
vtl_structure = to_vtl_json(schema, dataset_name)
|
|
462
|
+
data_structures.append(vtl_structure)
|
|
463
|
+
datapoints[dataset_name] = dataset.data
|
|
464
|
+
|
|
465
|
+
missing = []
|
|
466
|
+
for input_name in input_names:
|
|
467
|
+
if input_name not in mapping_dict.values():
|
|
468
|
+
missing.append(input_name)
|
|
469
|
+
if missing:
|
|
470
|
+
raise SemanticError("0-1-3-6", missing=missing)
|
|
471
|
+
|
|
472
|
+
result = run(
|
|
473
|
+
script=script,
|
|
474
|
+
data_structures=data_structures,
|
|
475
|
+
datapoints=datapoints,
|
|
476
|
+
value_domains=value_domains,
|
|
477
|
+
external_routines=external_routines,
|
|
478
|
+
time_period_output_format=time_period_output_format,
|
|
479
|
+
return_only_persistent=return_only_persistent,
|
|
480
|
+
output_folder=output_folder,
|
|
481
|
+
)
|
|
482
|
+
return result
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
def generate_sdmx(
|
|
486
|
+
script: Union[str, Path], agency_id: str, id: str, version: str = "1.0"
|
|
487
|
+
) -> TransformationScheme:
|
|
488
|
+
"""
|
|
489
|
+
Function that generates a TransformationScheme object from a VTL script.
|
|
490
|
+
|
|
491
|
+
The TransformationScheme object is the SDMX representation of the VTL script. \
|
|
492
|
+
For more details please check the `SDMX IM VTL objects \
|
|
493
|
+
<https://sdmx.org/wp-content/uploads/SDMX_3-0-0_SECTION_2_FINAL-1_0.pdf#page=146>`_, line 2266.
|
|
494
|
+
|
|
495
|
+
Args:
|
|
496
|
+
script: A string with the VTL script.
|
|
497
|
+
agency_id: The Agency ID used in the generated `TransformationScheme` object.
|
|
498
|
+
id: The given id of the generated `TransformationScheme` object.
|
|
499
|
+
version: The Version used in the generated `TransformationScheme` object. (default: "1.0")
|
|
500
|
+
|
|
501
|
+
Returns:
|
|
502
|
+
The generated Transformation Scheme object.
|
|
503
|
+
"""
|
|
504
|
+
vtl = load_vtl(script)
|
|
505
|
+
ast = create_ast(vtl)
|
|
506
|
+
result = ast_to_sdmx(ast, agency_id, id, version)
|
|
507
|
+
return result
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from antlr4 import CommonTokenStream, InputStream
|
|
2
|
+
from antlr4.Token import CommonToken
|
|
3
|
+
|
|
4
|
+
from vtlengine.API import create_ast
|
|
5
|
+
from vtlengine.AST import Comment, Start
|
|
6
|
+
from vtlengine.AST.ASTConstructorModules import extract_token_info
|
|
7
|
+
from vtlengine.AST.Grammar.lexer import Lexer
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def generate_ast_comment(token: CommonToken) -> Comment:
|
|
11
|
+
"""
|
|
12
|
+
Parses a token belonging to a comment and returns a Comment AST object.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
token (str): The comment string to parse.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
Comment: A Comment AST object.
|
|
19
|
+
"""
|
|
20
|
+
token_info = extract_token_info(token)
|
|
21
|
+
text = token.text
|
|
22
|
+
if token.type == Lexer.SL_COMMENT:
|
|
23
|
+
text = token.text[:-1] # Remove the trailing newline character
|
|
24
|
+
return Comment(value=text, **token_info)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def create_ast_with_comments(text: str) -> Start:
|
|
28
|
+
"""
|
|
29
|
+
Parses a VTL script and returns an AST with comments.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
text (str): The VTL script to parse.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
AST: The generated AST with comments.
|
|
36
|
+
"""
|
|
37
|
+
# Call the create_ast function to generate the AST from channel 0
|
|
38
|
+
ast = create_ast(text)
|
|
39
|
+
|
|
40
|
+
# Reading the script on channel 2 to get the comments
|
|
41
|
+
lexer_ = Lexer(InputStream(text))
|
|
42
|
+
stream = CommonTokenStream(lexer_, channel=2)
|
|
43
|
+
|
|
44
|
+
# Fill the stream with tokens on the buffer
|
|
45
|
+
stream.fill()
|
|
46
|
+
|
|
47
|
+
# Extract comments from the stream
|
|
48
|
+
comments = [generate_ast_comment(token) for token in stream.tokens if token.channel == 2]
|
|
49
|
+
|
|
50
|
+
# Add comments to the AST
|
|
51
|
+
ast.children.extend(comments)
|
|
52
|
+
|
|
53
|
+
# Sort the ast children based on their start line and column
|
|
54
|
+
ast.children.sort(key=lambda x: (x.line_start, x.column_start))
|
|
55
|
+
|
|
56
|
+
return ast
|