vtlengine 1.1rc2__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +231 -6
- vtlengine/API/__init__.py +256 -65
- vtlengine/AST/ASTComment.py +56 -0
- vtlengine/AST/ASTConstructor.py +71 -18
- vtlengine/AST/ASTConstructorModules/Expr.py +191 -72
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +81 -38
- vtlengine/AST/ASTConstructorModules/Terminals.py +76 -31
- vtlengine/AST/ASTConstructorModules/__init__.py +50 -0
- vtlengine/AST/ASTEncoders.py +4 -0
- vtlengine/AST/ASTString.py +622 -0
- vtlengine/AST/ASTTemplate.py +28 -2
- vtlengine/AST/DAG/__init__.py +10 -1
- vtlengine/AST/__init__.py +127 -14
- vtlengine/Exceptions/messages.py +9 -0
- vtlengine/Interpreter/__init__.py +53 -8
- vtlengine/Model/__init__.py +9 -4
- vtlengine/Operators/Aggregation.py +7 -5
- vtlengine/Operators/Analytic.py +16 -11
- vtlengine/Operators/Conditional.py +20 -5
- vtlengine/Operators/Time.py +11 -10
- vtlengine/Utils/__init__.py +49 -0
- vtlengine/__init__.py +4 -2
- vtlengine/files/parser/__init__.py +16 -26
- vtlengine/files/parser/_rfc_dialect.py +1 -1
- vtlengine/py.typed +0 -0
- vtlengine-1.1.1.dist-info/METADATA +92 -0
- {vtlengine-1.1rc2.dist-info → vtlengine-1.1.1.dist-info}/RECORD +29 -26
- {vtlengine-1.1rc2.dist-info → vtlengine-1.1.1.dist-info}/WHEEL +1 -1
- vtlengine-1.1rc2.dist-info/METADATA +0 -248
- {vtlengine-1.1rc2.dist-info → vtlengine-1.1.1.dist-info}/LICENSE.md +0 -0
vtlengine/API/__init__.py
CHANGED
|
@@ -1,29 +1,41 @@
|
|
|
1
|
+
import warnings
|
|
1
2
|
from pathlib import Path
|
|
2
|
-
from typing import Any, Dict, List, Optional, Union
|
|
3
|
+
from typing import Any, Dict, List, Optional, Sequence, Union
|
|
3
4
|
|
|
4
5
|
import pandas as pd
|
|
5
6
|
from antlr4 import CommonTokenStream, InputStream # type: ignore[import-untyped]
|
|
6
7
|
from antlr4.error.ErrorListener import ErrorListener # type: ignore[import-untyped]
|
|
8
|
+
from pysdmx.io.pd import PandasDataset
|
|
9
|
+
from pysdmx.model import DataflowRef, Reference, TransformationScheme
|
|
10
|
+
from pysdmx.model.dataflow import Dataflow, Schema
|
|
11
|
+
from pysdmx.model.vtl import VtlDataflowMapping
|
|
12
|
+
from pysdmx.util import parse_urn
|
|
7
13
|
|
|
8
14
|
from vtlengine.API._InternalApi import (
|
|
9
15
|
_check_output_folder,
|
|
16
|
+
_check_script,
|
|
10
17
|
_return_only_persistent_datasets,
|
|
18
|
+
ast_to_sdmx,
|
|
11
19
|
load_datasets,
|
|
12
20
|
load_datasets_with_data,
|
|
13
21
|
load_external_routines,
|
|
14
22
|
load_value_domains,
|
|
15
23
|
load_vtl,
|
|
24
|
+
to_vtl_json,
|
|
16
25
|
)
|
|
17
26
|
from vtlengine.AST import Start
|
|
18
27
|
from vtlengine.AST.ASTConstructor import ASTVisitor
|
|
28
|
+
from vtlengine.AST.ASTString import ASTString
|
|
19
29
|
from vtlengine.AST.DAG import DAGAnalyzer
|
|
20
30
|
from vtlengine.AST.Grammar.lexer import Lexer
|
|
21
31
|
from vtlengine.AST.Grammar.parser import Parser
|
|
32
|
+
from vtlengine.Exceptions import SemanticError
|
|
22
33
|
from vtlengine.files.output._time_period_representation import (
|
|
23
34
|
TimePeriodRepresentation,
|
|
24
35
|
format_time_period_external_representation,
|
|
25
36
|
)
|
|
26
37
|
from vtlengine.Interpreter import InterpreterAnalyzer
|
|
38
|
+
from vtlengine.Model import Dataset
|
|
27
39
|
|
|
28
40
|
pd.options.mode.chained_assignment = None
|
|
29
41
|
|
|
@@ -68,6 +80,38 @@ def _parser(stream: CommonTokenStream) -> Any:
|
|
|
68
80
|
return vtl_parser.start()
|
|
69
81
|
|
|
70
82
|
|
|
83
|
+
def _extract_input_datasets(script: Union[str, TransformationScheme, Path]) -> str:
|
|
84
|
+
if isinstance(script, TransformationScheme):
|
|
85
|
+
vtl_script = _check_script(script)
|
|
86
|
+
elif isinstance(script, (str, Path)):
|
|
87
|
+
vtl_script = load_vtl(script)
|
|
88
|
+
else:
|
|
89
|
+
raise TypeError("Unsupported script type.")
|
|
90
|
+
|
|
91
|
+
ast = create_ast(vtl_script)
|
|
92
|
+
dag_inputs = DAGAnalyzer.ds_structure(ast)["global_inputs"]
|
|
93
|
+
|
|
94
|
+
return dag_inputs
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def prettify(script: Union[str, TransformationScheme, Path]) -> str:
|
|
98
|
+
"""
|
|
99
|
+
Function that prettifies the VTL script given.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
script: VTL script as a string, a Transformation Scheme object or Path with the VTL script.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
A str with the prettified VTL script.
|
|
106
|
+
"""
|
|
107
|
+
from vtlengine.AST.ASTComment import create_ast_with_comments
|
|
108
|
+
|
|
109
|
+
checking = _check_script(script)
|
|
110
|
+
vtl = load_vtl(checking)
|
|
111
|
+
ast = create_ast_with_comments(vtl)
|
|
112
|
+
return ASTString(pretty=True).render(ast)
|
|
113
|
+
|
|
114
|
+
|
|
71
115
|
def create_ast(text: str) -> Start:
|
|
72
116
|
"""
|
|
73
117
|
Function that creates the AST object.
|
|
@@ -90,30 +134,19 @@ def create_ast(text: str) -> Start:
|
|
|
90
134
|
|
|
91
135
|
|
|
92
136
|
def semantic_analysis(
|
|
93
|
-
script: Union[str, Path],
|
|
94
|
-
data_structures: Union[Dict[str, Any], Path, List[
|
|
137
|
+
script: Union[str, TransformationScheme, Path],
|
|
138
|
+
data_structures: Union[Dict[str, Any], Path, List[Dict[str, Any]], List[Path]],
|
|
95
139
|
value_domains: Optional[Union[Dict[str, Any], Path]] = None,
|
|
96
140
|
external_routines: Optional[Union[Dict[str, Any], Path]] = None,
|
|
97
|
-
) ->
|
|
141
|
+
) -> Dict[str, Dataset]:
|
|
98
142
|
"""
|
|
99
|
-
Checks if the vtl
|
|
100
|
-
|
|
143
|
+
Checks if the vtl scripts and its related datastructures are valid. As part of the compatibility
|
|
144
|
+
with pysdmx library, the vtl script can be a Transformation Scheme object, which availability as
|
|
145
|
+
input is going to be serialized as a string VTL script.
|
|
101
146
|
|
|
102
|
-
|
|
103
|
-
that contains the vtl file.
|
|
104
|
-
|
|
105
|
-
Moreover, the data structure can be a dictionary or a filepath to the folder that contains it.
|
|
106
|
-
|
|
107
|
-
If there are any value domains or external routines, this data is taken into account.
|
|
108
|
-
Both can be loaded the same way as data structures or vtl scripts are.
|
|
109
|
-
|
|
110
|
-
Finally, the :obj:`Interpreter <vtl-engine-spark.Interpreter.InterpreterAnalyzer>`
|
|
111
|
-
class takes all of this information and checks it with the ast generated to
|
|
112
|
-
return the semantic analysis result.
|
|
113
|
-
|
|
114
|
-
Concepts you may know:
|
|
147
|
+
Concepts you may need to know:
|
|
115
148
|
|
|
116
|
-
- Vtl script: The
|
|
149
|
+
- Vtl script: The script that shows the set of operations to be executed.
|
|
117
150
|
|
|
118
151
|
- Data Structure: JSON file that contains the structure and the name for the dataset(s) \
|
|
119
152
|
(and/or scalar) about the datatype (String, integer or number), \
|
|
@@ -126,7 +159,8 @@ def semantic_analysis(
|
|
|
126
159
|
This function has the following params:
|
|
127
160
|
|
|
128
161
|
Args:
|
|
129
|
-
script:
|
|
162
|
+
script: Vtl script as a string, Transformation Scheme object or Path to the folder \
|
|
163
|
+
that holds the vtl script.
|
|
130
164
|
data_structures: Dict or Path (file or folder), \
|
|
131
165
|
or List of Dicts or Paths with the data structures JSON files.
|
|
132
166
|
value_domains: Dict or Path of the value domains JSON files. (default: None)
|
|
@@ -139,8 +173,10 @@ def semantic_analysis(
|
|
|
139
173
|
Exception: If the files have the wrong format, or they do not exist, \
|
|
140
174
|
or their Paths are invalid.
|
|
141
175
|
"""
|
|
176
|
+
|
|
142
177
|
# AST generation
|
|
143
|
-
|
|
178
|
+
checking = _check_script(script)
|
|
179
|
+
vtl = load_vtl(checking)
|
|
144
180
|
ast = create_ast(vtl)
|
|
145
181
|
|
|
146
182
|
# Loading datasets
|
|
@@ -166,30 +202,34 @@ def semantic_analysis(
|
|
|
166
202
|
|
|
167
203
|
|
|
168
204
|
def run(
|
|
169
|
-
script: Union[str, Path],
|
|
170
|
-
data_structures: Union[Dict[str, Any], Path, List[
|
|
171
|
-
datapoints: Union[Dict[str,
|
|
205
|
+
script: Union[str, TransformationScheme, Path],
|
|
206
|
+
data_structures: Union[Dict[str, Any], Path, List[Dict[str, Any]], List[Path]],
|
|
207
|
+
datapoints: Union[Dict[str, pd.DataFrame], str, Path, List[Dict[str, Any]], List[Path]],
|
|
172
208
|
value_domains: Optional[Union[Dict[str, Any], Path]] = None,
|
|
173
209
|
external_routines: Optional[Union[str, Path]] = None,
|
|
174
210
|
time_period_output_format: str = "vtl",
|
|
175
|
-
return_only_persistent: bool =
|
|
211
|
+
return_only_persistent: bool = True,
|
|
176
212
|
output_folder: Optional[Union[str, Path]] = None,
|
|
177
|
-
) ->
|
|
213
|
+
) -> Dict[str, Dataset]:
|
|
178
214
|
"""
|
|
179
|
-
Run is the main function of the ``API``, which mission is to
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
At the same time, data structures are loaded with its datapoints.
|
|
215
|
+
Run is the main function of the ``API``, which mission is to execute
|
|
216
|
+
the vtl operation over the data.
|
|
217
|
+
|
|
218
|
+
Concepts you may need to know:
|
|
184
219
|
|
|
185
|
-
|
|
186
|
-
and establish the datatype (string, integer or number),
|
|
187
|
-
and the role that each component is going to have (Identifier, Attribute or Measure).
|
|
188
|
-
It can be a dictionary or a path to the JSON file or folder that contains it.
|
|
220
|
+
- Vtl script: The script that shows the set of operations to be executed.
|
|
189
221
|
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
222
|
+
- Data Structure: JSON file that contains the structure and the name for the dataset(s) \
|
|
223
|
+
(and/or scalar) about the datatype (String, integer or number), \
|
|
224
|
+
the role (Identifier, Attribute or Measure) and the nullability each component has.
|
|
225
|
+
|
|
226
|
+
- Data point: `Pandas Dataframe \
|
|
227
|
+
<https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_ \
|
|
228
|
+
that holds the data related to the Dataset.
|
|
229
|
+
|
|
230
|
+
- Value domains: Collection of unique values on the same datatype.
|
|
231
|
+
|
|
232
|
+
- External routines: SQL query used to transform a dataset.
|
|
193
233
|
|
|
194
234
|
.. important::
|
|
195
235
|
The data structure and the data points must have the same dataset
|
|
@@ -212,35 +252,12 @@ def run(
|
|
|
212
252
|
For more details, see
|
|
213
253
|
`s3fs documentation <https://s3fs.readthedocs.io/en/latest/index.html#credentials>`_.
|
|
214
254
|
|
|
215
|
-
Before the execution, the DAG analysis reviews if the VTL script is a direct acyclic
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
If value domain data or external routines are required, the function loads this information
|
|
219
|
-
and integrates them into the
|
|
220
|
-
:obj:`Interpreter <vtl-engine-spark.Interpreter.InterpreterAnalyzer>` class.
|
|
221
|
-
|
|
222
|
-
Moreover, if any component has a Time Period component, the external representation
|
|
223
|
-
is passed to the Interpreter class.
|
|
224
|
-
|
|
225
|
-
Concepts you may need to know:
|
|
226
|
-
|
|
227
|
-
- Vtl script: The expression that shows the operation to be done.
|
|
228
|
-
|
|
229
|
-
- Data Structure: JSON file that contains the structure and the name for the dataset(s) \
|
|
230
|
-
(and/or scalar) about the datatype (String, integer or number), \
|
|
231
|
-
the role (Identifier, Attribute or Measure) and the nullability each component has.
|
|
232
|
-
|
|
233
|
-
- Data point: Pointer to the data. It will be loaded as a `Pandas Dataframe \
|
|
234
|
-
<https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_.
|
|
235
|
-
|
|
236
|
-
- Value domains: Collection of unique values that have the same datatype.
|
|
237
|
-
|
|
238
|
-
- External routines: SQL query used to transform a dataset.
|
|
255
|
+
Before the execution, the DAG analysis reviews if the VTL script is a direct acyclic graph.
|
|
239
256
|
|
|
240
257
|
This function has the following params:
|
|
241
258
|
|
|
242
259
|
Args:
|
|
243
|
-
script:
|
|
260
|
+
script: VTL script as a string, a Transformation Scheme object or Path with the VTL script.
|
|
244
261
|
|
|
245
262
|
data_structures: Dict, Path or a List of Dicts or Paths with the data structures.
|
|
246
263
|
|
|
@@ -255,7 +272,7 @@ def run(
|
|
|
255
272
|
Time Period components.
|
|
256
273
|
|
|
257
274
|
return_only_persistent: If True, run function will only return the results of \
|
|
258
|
-
Persistent Assignments. (default:
|
|
275
|
+
Persistent Assignments. (default: True)
|
|
259
276
|
|
|
260
277
|
output_folder: Path or S3 URI to the output folder. (default: None)
|
|
261
278
|
|
|
@@ -268,7 +285,9 @@ def run(
|
|
|
268
285
|
or their Paths are invalid.
|
|
269
286
|
|
|
270
287
|
"""
|
|
288
|
+
|
|
271
289
|
# AST generation
|
|
290
|
+
script = _check_script(script)
|
|
272
291
|
vtl = load_vtl(script)
|
|
273
292
|
ast = create_ast(vtl)
|
|
274
293
|
|
|
@@ -302,6 +321,7 @@ def run(
|
|
|
302
321
|
datapoints_paths=path_dict,
|
|
303
322
|
output_path=output_folder,
|
|
304
323
|
time_period_representation=time_period_representation,
|
|
324
|
+
return_only_persistent=return_only_persistent,
|
|
305
325
|
)
|
|
306
326
|
result = interpreter.visit(ast)
|
|
307
327
|
|
|
@@ -314,3 +334,174 @@ def run(
|
|
|
314
334
|
if return_only_persistent:
|
|
315
335
|
return _return_only_persistent_datasets(result, ast)
|
|
316
336
|
return result
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def run_sdmx( # noqa: C901
|
|
340
|
+
script: Union[str, TransformationScheme, Path],
|
|
341
|
+
datasets: Sequence[PandasDataset],
|
|
342
|
+
mappings: Optional[Union[VtlDataflowMapping, Dict[str, str]]] = None,
|
|
343
|
+
value_domains: Optional[Union[Dict[str, Any], Path]] = None,
|
|
344
|
+
external_routines: Optional[Union[str, Path]] = None,
|
|
345
|
+
time_period_output_format: str = "vtl",
|
|
346
|
+
return_only_persistent: bool = True,
|
|
347
|
+
output_folder: Optional[Union[str, Path]] = None,
|
|
348
|
+
) -> Dict[str, Dataset]:
|
|
349
|
+
"""
|
|
350
|
+
Executes a VTL script using a list of pysdmx `PandasDataset` objects.
|
|
351
|
+
|
|
352
|
+
This function prepares the required VTL data structures and datapoints from
|
|
353
|
+
the given list of pysdmx `PandasDataset` objects. It validates each
|
|
354
|
+
`PandasDataset` uses a valid `Schema` instance as its structure. Each `Schema` is converted
|
|
355
|
+
to the appropriate VTL JSON data structure, and the Pandas Dataframe is extracted.
|
|
356
|
+
|
|
357
|
+
.. important::
|
|
358
|
+
We recommend to use this function in combination with the
|
|
359
|
+
`get_datasets <https://py.sdmx.io/howto/data_rw.html#pysdmx.io.get_datasets>`_
|
|
360
|
+
pysdmx method.
|
|
361
|
+
|
|
362
|
+
.. important::
|
|
363
|
+
The mapping between pysdmx `PandasDataset
|
|
364
|
+
<https://py.sdmx.io/howto/data_rw.html#pysdmx.io.pd.PandasDataset>`_ \
|
|
365
|
+
and VTL datasets is done using the `Schema` instance of the `PandasDataset`.
|
|
366
|
+
The Schema ID is used as the dataset name.
|
|
367
|
+
|
|
368
|
+
DataStructure=MD:TEST_DS(1.0) -> TEST_DS
|
|
369
|
+
|
|
370
|
+
The function then calls the :obj:`run <vtlengine.API>` function with the provided VTL
|
|
371
|
+
script and prepared inputs.
|
|
372
|
+
|
|
373
|
+
Before the execution, the DAG analysis reviews if the generated VTL script is a direct acyclic
|
|
374
|
+
graph.
|
|
375
|
+
|
|
376
|
+
Args:
|
|
377
|
+
script: VTL script as a string, a Transformation Scheme object or Path with the VTL script.
|
|
378
|
+
|
|
379
|
+
datasets: A list of PandasDataset.
|
|
380
|
+
|
|
381
|
+
mappings: A dictionary or VtlDataflowMapping object that maps the dataset names.
|
|
382
|
+
|
|
383
|
+
value_domains: Dict or Path of the value domains JSON files. (default:None)
|
|
384
|
+
|
|
385
|
+
external_routines: String or Path of the external routines SQL files. (default: None)
|
|
386
|
+
|
|
387
|
+
time_period_output_format: String with the possible values \
|
|
388
|
+
("sdmx_gregorian", "sdmx_reporting", "vtl") for the representation of the \
|
|
389
|
+
Time Period components.
|
|
390
|
+
|
|
391
|
+
return_only_persistent: If True, run function will only return the results of \
|
|
392
|
+
Persistent Assignments. (default: True)
|
|
393
|
+
|
|
394
|
+
output_folder: Path or S3 URI to the output folder. (default: None)
|
|
395
|
+
|
|
396
|
+
Returns:
|
|
397
|
+
The datasets are produced without data if the output folder is defined.
|
|
398
|
+
|
|
399
|
+
Raises:
|
|
400
|
+
SemanticError: If any dataset does not contain a valid `Schema` instance as its structure.
|
|
401
|
+
|
|
402
|
+
"""
|
|
403
|
+
mapping_dict = {}
|
|
404
|
+
input_names = _extract_input_datasets(script)
|
|
405
|
+
|
|
406
|
+
# Mapping handling
|
|
407
|
+
|
|
408
|
+
if mappings is None:
|
|
409
|
+
if len(datasets) != 1:
|
|
410
|
+
raise SemanticError("0-1-3-3")
|
|
411
|
+
if len(datasets) == 1:
|
|
412
|
+
if len(input_names) != 1:
|
|
413
|
+
raise SemanticError("0-1-3-1", number_datasets=len(input_names))
|
|
414
|
+
schema = datasets[0].structure
|
|
415
|
+
if not isinstance(schema, Schema):
|
|
416
|
+
raise SemanticError("0-1-3-2", schema=schema)
|
|
417
|
+
mapping_dict = {schema.short_urn: input_names[0]}
|
|
418
|
+
elif isinstance(mappings, Dict):
|
|
419
|
+
mapping_dict = mappings
|
|
420
|
+
elif isinstance(mappings, VtlDataflowMapping):
|
|
421
|
+
if mappings.to_vtl_mapping_method is not None:
|
|
422
|
+
warnings.warn(
|
|
423
|
+
"To_vtl_mapping_method is not implemented yet, we will use the Basic "
|
|
424
|
+
"method with old data."
|
|
425
|
+
)
|
|
426
|
+
if mappings.from_vtl_mapping_method is not None:
|
|
427
|
+
warnings.warn(
|
|
428
|
+
"From_vtl_mapping_method is not implemented yet, we will use the Basic "
|
|
429
|
+
"method with old data."
|
|
430
|
+
)
|
|
431
|
+
if isinstance(mappings.dataflow, str):
|
|
432
|
+
short_urn = str(parse_urn(mappings.dataflow))
|
|
433
|
+
elif isinstance(mappings.dataflow, (Reference, DataflowRef)):
|
|
434
|
+
short_urn = str(mappings.dataflow)
|
|
435
|
+
elif isinstance(mappings.dataflow, Dataflow):
|
|
436
|
+
short_urn = mappings.dataflow.short_urn
|
|
437
|
+
else:
|
|
438
|
+
raise TypeError(
|
|
439
|
+
"Expected str, Reference, DataflowRef or Dataflow type for dataflow in "
|
|
440
|
+
"VtlDataflowMapping."
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
mapping_dict = {short_urn: mappings.dataflow_alias}
|
|
444
|
+
else:
|
|
445
|
+
raise TypeError("Expected dict or VtlDataflowMapping type for mappings.")
|
|
446
|
+
|
|
447
|
+
for vtl_name in mapping_dict.values():
|
|
448
|
+
if vtl_name not in input_names:
|
|
449
|
+
raise SemanticError("0-1-3-5", dataset_name=vtl_name)
|
|
450
|
+
|
|
451
|
+
datapoints = {}
|
|
452
|
+
data_structures = []
|
|
453
|
+
for dataset in datasets:
|
|
454
|
+
schema = dataset.structure
|
|
455
|
+
if not isinstance(schema, Schema):
|
|
456
|
+
raise SemanticError("0-1-3-2", schema=schema)
|
|
457
|
+
if schema.short_urn not in mapping_dict:
|
|
458
|
+
raise SemanticError("0-1-3-4", short_urn=schema.short_urn)
|
|
459
|
+
# Generating VTL Datastructure and Datapoints.
|
|
460
|
+
dataset_name = mapping_dict[schema.short_urn]
|
|
461
|
+
vtl_structure = to_vtl_json(schema, dataset_name)
|
|
462
|
+
data_structures.append(vtl_structure)
|
|
463
|
+
datapoints[dataset_name] = dataset.data
|
|
464
|
+
|
|
465
|
+
missing = []
|
|
466
|
+
for input_name in input_names:
|
|
467
|
+
if input_name not in mapping_dict.values():
|
|
468
|
+
missing.append(input_name)
|
|
469
|
+
if missing:
|
|
470
|
+
raise SemanticError("0-1-3-6", missing=missing)
|
|
471
|
+
|
|
472
|
+
result = run(
|
|
473
|
+
script=script,
|
|
474
|
+
data_structures=data_structures,
|
|
475
|
+
datapoints=datapoints,
|
|
476
|
+
value_domains=value_domains,
|
|
477
|
+
external_routines=external_routines,
|
|
478
|
+
time_period_output_format=time_period_output_format,
|
|
479
|
+
return_only_persistent=return_only_persistent,
|
|
480
|
+
output_folder=output_folder,
|
|
481
|
+
)
|
|
482
|
+
return result
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
def generate_sdmx(
|
|
486
|
+
script: Union[str, Path], agency_id: str, id: str, version: str = "1.0"
|
|
487
|
+
) -> TransformationScheme:
|
|
488
|
+
"""
|
|
489
|
+
Function that generates a TransformationScheme object from a VTL script.
|
|
490
|
+
|
|
491
|
+
The TransformationScheme object is the SDMX representation of the VTL script. \
|
|
492
|
+
For more details please check the `SDMX IM VTL objects \
|
|
493
|
+
<https://sdmx.org/wp-content/uploads/SDMX_3-0-0_SECTION_2_FINAL-1_0.pdf#page=146>`_, line 2266.
|
|
494
|
+
|
|
495
|
+
Args:
|
|
496
|
+
script: A string with the VTL script.
|
|
497
|
+
agency_id: The Agency ID used in the generated `TransformationScheme` object.
|
|
498
|
+
id: The given id of the generated `TransformationScheme` object.
|
|
499
|
+
version: The Version used in the generated `TransformationScheme` object. (default: "1.0")
|
|
500
|
+
|
|
501
|
+
Returns:
|
|
502
|
+
The generated Transformation Scheme object.
|
|
503
|
+
"""
|
|
504
|
+
vtl = load_vtl(script)
|
|
505
|
+
ast = create_ast(vtl)
|
|
506
|
+
result = ast_to_sdmx(ast, agency_id, id, version)
|
|
507
|
+
return result
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from antlr4 import CommonTokenStream, InputStream
|
|
2
|
+
from antlr4.Token import CommonToken
|
|
3
|
+
|
|
4
|
+
from vtlengine.API import create_ast
|
|
5
|
+
from vtlengine.AST import Comment, Start
|
|
6
|
+
from vtlengine.AST.ASTConstructorModules import extract_token_info
|
|
7
|
+
from vtlengine.AST.Grammar.lexer import Lexer
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def generate_ast_comment(token: CommonToken) -> Comment:
|
|
11
|
+
"""
|
|
12
|
+
Parses a token belonging to a comment and returns a Comment AST object.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
token (str): The comment string to parse.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
Comment: A Comment AST object.
|
|
19
|
+
"""
|
|
20
|
+
token_info = extract_token_info(token)
|
|
21
|
+
text = token.text
|
|
22
|
+
if token.type == Lexer.SL_COMMENT:
|
|
23
|
+
text = token.text[:-1] # Remove the trailing newline character
|
|
24
|
+
return Comment(value=text, **token_info)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def create_ast_with_comments(text: str) -> Start:
|
|
28
|
+
"""
|
|
29
|
+
Parses a VTL script and returns an AST with comments.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
text (str): The VTL script to parse.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
AST: The generated AST with comments.
|
|
36
|
+
"""
|
|
37
|
+
# Call the create_ast function to generate the AST from channel 0
|
|
38
|
+
ast = create_ast(text)
|
|
39
|
+
|
|
40
|
+
# Reading the script on channel 2 to get the comments
|
|
41
|
+
lexer_ = Lexer(InputStream(text))
|
|
42
|
+
stream = CommonTokenStream(lexer_, channel=2)
|
|
43
|
+
|
|
44
|
+
# Fill the stream with tokens on the buffer
|
|
45
|
+
stream.fill()
|
|
46
|
+
|
|
47
|
+
# Extract comments from the stream
|
|
48
|
+
comments = [generate_ast_comment(token) for token in stream.tokens if token.channel == 2]
|
|
49
|
+
|
|
50
|
+
# Add comments to the AST
|
|
51
|
+
ast.children.extend(comments)
|
|
52
|
+
|
|
53
|
+
# Sort the ast children based on their start line and column
|
|
54
|
+
ast.children.sort(key=lambda x: (x.line_start, x.column_start))
|
|
55
|
+
|
|
56
|
+
return ast
|