vtlengine 1.0.3rc3__py3-none-any.whl → 1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +288 -61
- vtlengine/API/__init__.py +269 -71
- vtlengine/API/data/schema/json_schema_2.1.json +116 -0
- vtlengine/AST/ASTComment.py +56 -0
- vtlengine/AST/ASTConstructor.py +76 -22
- vtlengine/AST/ASTConstructorModules/Expr.py +238 -120
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +126 -61
- vtlengine/AST/ASTConstructorModules/Terminals.py +97 -42
- vtlengine/AST/ASTConstructorModules/__init__.py +50 -0
- vtlengine/AST/ASTEncoders.py +5 -1
- vtlengine/AST/ASTString.py +608 -0
- vtlengine/AST/ASTTemplate.py +28 -2
- vtlengine/AST/DAG/__init__.py +10 -4
- vtlengine/AST/Grammar/lexer.py +0 -1
- vtlengine/AST/Grammar/parser.py +185 -440
- vtlengine/AST/VtlVisitor.py +0 -1
- vtlengine/AST/__init__.py +127 -14
- vtlengine/DataTypes/TimeHandling.py +50 -15
- vtlengine/DataTypes/__init__.py +79 -7
- vtlengine/Exceptions/__init__.py +3 -5
- vtlengine/Exceptions/messages.py +74 -105
- vtlengine/Interpreter/__init__.py +136 -46
- vtlengine/Model/__init__.py +14 -11
- vtlengine/Operators/Aggregation.py +17 -9
- vtlengine/Operators/Analytic.py +64 -20
- vtlengine/Operators/Assignment.py +0 -1
- vtlengine/Operators/CastOperator.py +44 -44
- vtlengine/Operators/Clause.py +16 -10
- vtlengine/Operators/Comparison.py +20 -12
- vtlengine/Operators/Conditional.py +47 -15
- vtlengine/Operators/General.py +9 -4
- vtlengine/Operators/HROperators.py +4 -14
- vtlengine/Operators/Join.py +15 -14
- vtlengine/Operators/Numeric.py +32 -26
- vtlengine/Operators/RoleSetter.py +6 -2
- vtlengine/Operators/Set.py +12 -8
- vtlengine/Operators/String.py +9 -9
- vtlengine/Operators/Time.py +145 -124
- vtlengine/Operators/Validation.py +10 -4
- vtlengine/Operators/__init__.py +56 -69
- vtlengine/Utils/__init__.py +55 -1
- vtlengine/__extras_check.py +17 -0
- vtlengine/__init__.py +2 -2
- vtlengine/files/output/__init__.py +2 -1
- vtlengine/files/output/_time_period_representation.py +2 -1
- vtlengine/files/parser/__init__.py +52 -46
- vtlengine/files/parser/_time_checking.py +4 -4
- {vtlengine-1.0.3rc3.dist-info → vtlengine-1.1.dist-info}/METADATA +21 -17
- vtlengine-1.1.dist-info/RECORD +61 -0
- {vtlengine-1.0.3rc3.dist-info → vtlengine-1.1.dist-info}/WHEEL +1 -1
- vtlengine/DataTypes/NumericTypesHandling.py +0 -38
- vtlengine-1.0.3rc3.dist-info/RECORD +0 -58
- {vtlengine-1.0.3rc3.dist-info → vtlengine-1.1.dist-info}/LICENSE.md +0 -0
vtlengine/API/__init__.py
CHANGED
|
@@ -1,29 +1,41 @@
|
|
|
1
|
+
import warnings
|
|
1
2
|
from pathlib import Path
|
|
2
|
-
from typing import Any, Dict, List, Optional, Union
|
|
3
|
+
from typing import Any, Dict, List, Optional, Sequence, Union
|
|
3
4
|
|
|
4
5
|
import pandas as pd
|
|
5
6
|
from antlr4 import CommonTokenStream, InputStream # type: ignore[import-untyped]
|
|
6
7
|
from antlr4.error.ErrorListener import ErrorListener # type: ignore[import-untyped]
|
|
8
|
+
from pysdmx.io.pd import PandasDataset
|
|
9
|
+
from pysdmx.model import DataflowRef, Reference, TransformationScheme
|
|
10
|
+
from pysdmx.model.dataflow import Dataflow, Schema
|
|
11
|
+
from pysdmx.model.vtl import VtlDataflowMapping
|
|
12
|
+
from pysdmx.util import parse_urn
|
|
7
13
|
|
|
8
14
|
from vtlengine.API._InternalApi import (
|
|
9
15
|
_check_output_folder,
|
|
16
|
+
_check_script,
|
|
10
17
|
_return_only_persistent_datasets,
|
|
18
|
+
ast_to_sdmx,
|
|
11
19
|
load_datasets,
|
|
12
20
|
load_datasets_with_data,
|
|
13
21
|
load_external_routines,
|
|
14
22
|
load_value_domains,
|
|
15
23
|
load_vtl,
|
|
24
|
+
to_vtl_json,
|
|
16
25
|
)
|
|
17
26
|
from vtlengine.AST import Start
|
|
18
27
|
from vtlengine.AST.ASTConstructor import ASTVisitor
|
|
28
|
+
from vtlengine.AST.ASTString import ASTString
|
|
19
29
|
from vtlengine.AST.DAG import DAGAnalyzer
|
|
20
30
|
from vtlengine.AST.Grammar.lexer import Lexer
|
|
21
31
|
from vtlengine.AST.Grammar.parser import Parser
|
|
32
|
+
from vtlengine.Exceptions import SemanticError
|
|
22
33
|
from vtlengine.files.output._time_period_representation import (
|
|
23
34
|
TimePeriodRepresentation,
|
|
24
35
|
format_time_period_external_representation,
|
|
25
36
|
)
|
|
26
37
|
from vtlengine.Interpreter import InterpreterAnalyzer
|
|
38
|
+
from vtlengine.Model import Dataset
|
|
27
39
|
|
|
28
40
|
pd.options.mode.chained_assignment = None
|
|
29
41
|
|
|
@@ -32,7 +44,13 @@ class __VTLSingleErrorListener(ErrorListener): # type: ignore[misc]
|
|
|
32
44
|
""" """
|
|
33
45
|
|
|
34
46
|
def syntaxError(
|
|
35
|
-
self,
|
|
47
|
+
self,
|
|
48
|
+
recognizer: Any,
|
|
49
|
+
offendingSymbol: str,
|
|
50
|
+
line: str,
|
|
51
|
+
column: str,
|
|
52
|
+
msg: str,
|
|
53
|
+
e: Any,
|
|
36
54
|
) -> None:
|
|
37
55
|
raise Exception(
|
|
38
56
|
f"Not valid VTL Syntax \n "
|
|
@@ -62,6 +80,38 @@ def _parser(stream: CommonTokenStream) -> Any:
|
|
|
62
80
|
return vtl_parser.start()
|
|
63
81
|
|
|
64
82
|
|
|
83
|
+
def _extract_input_datasets(script: Union[str, TransformationScheme, Path]) -> str:
|
|
84
|
+
if isinstance(script, TransformationScheme):
|
|
85
|
+
vtl_script = _check_script(script)
|
|
86
|
+
elif isinstance(script, (str, Path)):
|
|
87
|
+
vtl_script = load_vtl(script)
|
|
88
|
+
else:
|
|
89
|
+
raise TypeError("Unsupported script type.")
|
|
90
|
+
|
|
91
|
+
ast = create_ast(vtl_script)
|
|
92
|
+
dag_inputs = DAGAnalyzer.ds_structure(ast)["global_inputs"]
|
|
93
|
+
|
|
94
|
+
return dag_inputs
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def prettify(script: Union[str, TransformationScheme, Path]) -> str:
|
|
98
|
+
"""
|
|
99
|
+
Function that prettifies the VTL script given.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
script: VTL script as a string, a Transformation Scheme object or Path with the VTL script.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
A str with the prettified VTL script.
|
|
106
|
+
"""
|
|
107
|
+
from vtlengine.AST.ASTComment import create_ast_with_comments
|
|
108
|
+
|
|
109
|
+
checking = _check_script(script)
|
|
110
|
+
vtl = load_vtl(checking)
|
|
111
|
+
ast = create_ast_with_comments(vtl)
|
|
112
|
+
return ASTString(pretty=True).render(ast)
|
|
113
|
+
|
|
114
|
+
|
|
65
115
|
def create_ast(text: str) -> Start:
|
|
66
116
|
"""
|
|
67
117
|
Function that creates the AST object.
|
|
@@ -84,30 +134,19 @@ def create_ast(text: str) -> Start:
|
|
|
84
134
|
|
|
85
135
|
|
|
86
136
|
def semantic_analysis(
|
|
87
|
-
script: Union[str, Path],
|
|
88
|
-
data_structures: Union[Dict[str, Any], Path, List[
|
|
137
|
+
script: Union[str, TransformationScheme, Path],
|
|
138
|
+
data_structures: Union[Dict[str, Any], Path, List[Dict[str, Any]], List[Path]],
|
|
89
139
|
value_domains: Optional[Union[Dict[str, Any], Path]] = None,
|
|
90
140
|
external_routines: Optional[Union[Dict[str, Any], Path]] = None,
|
|
91
|
-
) ->
|
|
141
|
+
) -> Dict[str, Dataset]:
|
|
92
142
|
"""
|
|
93
|
-
Checks if the vtl
|
|
94
|
-
|
|
143
|
+
Checks if the vtl scripts and its related datastructures are valid. As part of the compatibility
|
|
144
|
+
with pysdmx library, the vtl script can be a Transformation Scheme object, which availability as
|
|
145
|
+
input is going to be serialized as a string VTL script.
|
|
95
146
|
|
|
96
|
-
|
|
97
|
-
that contains the vtl file.
|
|
98
|
-
|
|
99
|
-
Moreover, the data structure can be a dictionary or a filepath to the folder that contains it.
|
|
100
|
-
|
|
101
|
-
If there are any value domains or external routines, this data is taken into account.
|
|
102
|
-
Both can be loaded the same way as data structures or vtl scripts are.
|
|
103
|
-
|
|
104
|
-
Finally, the :obj:`Interpreter <vtl-engine-spark.Interpreter.InterpreterAnalyzer>`
|
|
105
|
-
class takes all of this information and checks it with the ast generated to
|
|
106
|
-
return the semantic analysis result.
|
|
107
|
-
|
|
108
|
-
Concepts you may know:
|
|
147
|
+
Concepts you may need to know:
|
|
109
148
|
|
|
110
|
-
- Vtl script: The
|
|
149
|
+
- Vtl script: The script that shows the set of operations to be executed.
|
|
111
150
|
|
|
112
151
|
- Data Structure: JSON file that contains the structure and the name for the dataset(s) \
|
|
113
152
|
(and/or scalar) about the datatype (String, integer or number), \
|
|
@@ -120,7 +159,8 @@ def semantic_analysis(
|
|
|
120
159
|
This function has the following params:
|
|
121
160
|
|
|
122
161
|
Args:
|
|
123
|
-
script:
|
|
162
|
+
script: Vtl script as a string, Transformation Scheme object or Path to the folder \
|
|
163
|
+
that holds the vtl script.
|
|
124
164
|
data_structures: Dict or Path (file or folder), \
|
|
125
165
|
or List of Dicts or Paths with the data structures JSON files.
|
|
126
166
|
value_domains: Dict or Path of the value domains JSON files. (default: None)
|
|
@@ -133,8 +173,10 @@ def semantic_analysis(
|
|
|
133
173
|
Exception: If the files have the wrong format, or they do not exist, \
|
|
134
174
|
or their Paths are invalid.
|
|
135
175
|
"""
|
|
176
|
+
|
|
136
177
|
# AST generation
|
|
137
|
-
|
|
178
|
+
checking = _check_script(script)
|
|
179
|
+
vtl = load_vtl(checking)
|
|
138
180
|
ast = create_ast(vtl)
|
|
139
181
|
|
|
140
182
|
# Loading datasets
|
|
@@ -150,38 +192,44 @@ def semantic_analysis(
|
|
|
150
192
|
|
|
151
193
|
# Running the interpreter
|
|
152
194
|
interpreter = InterpreterAnalyzer(
|
|
153
|
-
datasets=structures,
|
|
195
|
+
datasets=structures,
|
|
196
|
+
value_domains=vd,
|
|
197
|
+
external_routines=ext_routines,
|
|
198
|
+
only_semantic=True,
|
|
154
199
|
)
|
|
155
|
-
|
|
156
|
-
result = interpreter.visit(ast)
|
|
200
|
+
result = interpreter.visit(ast)
|
|
157
201
|
return result
|
|
158
202
|
|
|
159
203
|
|
|
160
204
|
def run(
|
|
161
|
-
script: Union[str, Path],
|
|
162
|
-
data_structures: Union[Dict[str, Any], Path, List[
|
|
163
|
-
datapoints: Union[Dict[str,
|
|
205
|
+
script: Union[str, TransformationScheme, Path],
|
|
206
|
+
data_structures: Union[Dict[str, Any], Path, List[Dict[str, Any]], List[Path]],
|
|
207
|
+
datapoints: Union[Dict[str, pd.DataFrame], str, Path, List[Dict[str, Any]], List[Path]],
|
|
164
208
|
value_domains: Optional[Union[Dict[str, Any], Path]] = None,
|
|
165
209
|
external_routines: Optional[Union[str, Path]] = None,
|
|
166
210
|
time_period_output_format: str = "vtl",
|
|
167
|
-
return_only_persistent: bool =
|
|
211
|
+
return_only_persistent: bool = True,
|
|
168
212
|
output_folder: Optional[Union[str, Path]] = None,
|
|
169
|
-
) ->
|
|
213
|
+
) -> Dict[str, Dataset]:
|
|
170
214
|
"""
|
|
171
|
-
Run is the main function of the ``API``, which mission is to
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
215
|
+
Run is the main function of the ``API``, which mission is to execute
|
|
216
|
+
the vtl operation over the data.
|
|
217
|
+
|
|
218
|
+
Concepts you may need to know:
|
|
219
|
+
|
|
220
|
+
- Vtl script: The script that shows the set of operations to be executed.
|
|
176
221
|
|
|
177
|
-
|
|
178
|
-
and
|
|
179
|
-
|
|
180
|
-
|
|
222
|
+
- Data Structure: JSON file that contains the structure and the name for the dataset(s) \
|
|
223
|
+
(and/or scalar) about the datatype (String, integer or number), \
|
|
224
|
+
the role (Identifier, Attribute or Measure) and the nullability each component has.
|
|
225
|
+
|
|
226
|
+
- Data point: `Pandas Dataframe \
|
|
227
|
+
<https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_ \
|
|
228
|
+
that holds the data related to the Dataset.
|
|
181
229
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
230
|
+
- Value domains: Collection of unique values on the same datatype.
|
|
231
|
+
|
|
232
|
+
- External routines: SQL query used to transform a dataset.
|
|
185
233
|
|
|
186
234
|
.. important::
|
|
187
235
|
The data structure and the data points must have the same dataset
|
|
@@ -204,35 +252,12 @@ def run(
|
|
|
204
252
|
For more details, see
|
|
205
253
|
`s3fs documentation <https://s3fs.readthedocs.io/en/latest/index.html#credentials>`_.
|
|
206
254
|
|
|
207
|
-
Before the execution, the DAG analysis reviews if the VTL script is a direct acyclic
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
If value domain data or external routines are required, the function loads this information
|
|
211
|
-
and integrates them into the
|
|
212
|
-
:obj:`Interpreter <vtl-engine-spark.Interpreter.InterpreterAnalyzer>` class.
|
|
213
|
-
|
|
214
|
-
Moreover, if any component has a Time Period component, the external representation
|
|
215
|
-
is passed to the Interpreter class.
|
|
216
|
-
|
|
217
|
-
Concepts you may need to know:
|
|
218
|
-
|
|
219
|
-
- Vtl script: The expression that shows the operation to be done.
|
|
220
|
-
|
|
221
|
-
- Data Structure: JSON file that contains the structure and the name for the dataset(s) \
|
|
222
|
-
(and/or scalar) about the datatype (String, integer or number), \
|
|
223
|
-
the role (Identifier, Attribute or Measure) and the nullability each component has.
|
|
224
|
-
|
|
225
|
-
- Data point: Pointer to the data. It will be loaded as a `Pandas Dataframe \
|
|
226
|
-
<https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_.
|
|
227
|
-
|
|
228
|
-
- Value domains: Collection of unique values that have the same datatype.
|
|
229
|
-
|
|
230
|
-
- External routines: SQL query used to transform a dataset.
|
|
255
|
+
Before the execution, the DAG analysis reviews if the VTL script is a direct acyclic graph.
|
|
231
256
|
|
|
232
257
|
This function has the following params:
|
|
233
258
|
|
|
234
259
|
Args:
|
|
235
|
-
script:
|
|
260
|
+
script: VTL script as a string, a Transformation Scheme object or Path with the VTL script.
|
|
236
261
|
|
|
237
262
|
data_structures: Dict, Path or a List of Dicts or Paths with the data structures.
|
|
238
263
|
|
|
@@ -247,7 +272,7 @@ def run(
|
|
|
247
272
|
Time Period components.
|
|
248
273
|
|
|
249
274
|
return_only_persistent: If True, run function will only return the results of \
|
|
250
|
-
Persistent Assignments. (default:
|
|
275
|
+
Persistent Assignments. (default: True)
|
|
251
276
|
|
|
252
277
|
output_folder: Path or S3 URI to the output folder. (default: None)
|
|
253
278
|
|
|
@@ -260,7 +285,9 @@ def run(
|
|
|
260
285
|
or their Paths are invalid.
|
|
261
286
|
|
|
262
287
|
"""
|
|
288
|
+
|
|
263
289
|
# AST generation
|
|
290
|
+
script = _check_script(script)
|
|
264
291
|
vtl = load_vtl(script)
|
|
265
292
|
ast = create_ast(vtl)
|
|
266
293
|
|
|
@@ -294,9 +321,9 @@ def run(
|
|
|
294
321
|
datapoints_paths=path_dict,
|
|
295
322
|
output_path=output_folder,
|
|
296
323
|
time_period_representation=time_period_representation,
|
|
324
|
+
return_only_persistent=return_only_persistent,
|
|
297
325
|
)
|
|
298
|
-
|
|
299
|
-
result = interpreter.visit(ast)
|
|
326
|
+
result = interpreter.visit(ast)
|
|
300
327
|
|
|
301
328
|
# Applying time period output format
|
|
302
329
|
if output_folder is None:
|
|
@@ -307,3 +334,174 @@ def run(
|
|
|
307
334
|
if return_only_persistent:
|
|
308
335
|
return _return_only_persistent_datasets(result, ast)
|
|
309
336
|
return result
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def run_sdmx( # noqa: C901
|
|
340
|
+
script: Union[str, TransformationScheme, Path],
|
|
341
|
+
datasets: Sequence[PandasDataset],
|
|
342
|
+
mappings: Optional[Union[VtlDataflowMapping, Dict[str, str]]] = None,
|
|
343
|
+
value_domains: Optional[Union[Dict[str, Any], Path]] = None,
|
|
344
|
+
external_routines: Optional[Union[str, Path]] = None,
|
|
345
|
+
time_period_output_format: str = "vtl",
|
|
346
|
+
return_only_persistent: bool = True,
|
|
347
|
+
output_folder: Optional[Union[str, Path]] = None,
|
|
348
|
+
) -> Dict[str, Dataset]:
|
|
349
|
+
"""
|
|
350
|
+
Executes a VTL script using a list of pysdmx `PandasDataset` objects.
|
|
351
|
+
|
|
352
|
+
This function prepares the required VTL data structures and datapoints from
|
|
353
|
+
the given list of pysdmx `PandasDataset` objects. It validates each
|
|
354
|
+
`PandasDataset` uses a valid `Schema` instance as its structure. Each `Schema` is converted
|
|
355
|
+
to the appropriate VTL JSON data structure, and the Pandas Dataframe is extracted.
|
|
356
|
+
|
|
357
|
+
.. important::
|
|
358
|
+
We recommend to use this function in combination with the
|
|
359
|
+
`get_datasets <https://py.sdmx.io/howto/data_rw.html#pysdmx.io.get_datasets>`_
|
|
360
|
+
pysdmx method.
|
|
361
|
+
|
|
362
|
+
.. important::
|
|
363
|
+
The mapping between pysdmx `PandasDataset
|
|
364
|
+
<https://py.sdmx.io/howto/data_rw.html#pysdmx.io.pd.PandasDataset>`_ \
|
|
365
|
+
and VTL datasets is done using the `Schema` instance of the `PandasDataset`.
|
|
366
|
+
The Schema ID is used as the dataset name.
|
|
367
|
+
|
|
368
|
+
DataStructure=MD:TEST_DS(1.0) -> TEST_DS
|
|
369
|
+
|
|
370
|
+
The function then calls the :obj:`run <vtlengine.API>` function with the provided VTL
|
|
371
|
+
script and prepared inputs.
|
|
372
|
+
|
|
373
|
+
Before the execution, the DAG analysis reviews if the generated VTL script is a direct acyclic
|
|
374
|
+
graph.
|
|
375
|
+
|
|
376
|
+
Args:
|
|
377
|
+
script: VTL script as a string, a Transformation Scheme object or Path with the VTL script.
|
|
378
|
+
|
|
379
|
+
datasets: A list of PandasDataset.
|
|
380
|
+
|
|
381
|
+
mappings: A dictionary or VtlDataflowMapping object that maps the dataset names.
|
|
382
|
+
|
|
383
|
+
value_domains: Dict or Path of the value domains JSON files. (default:None)
|
|
384
|
+
|
|
385
|
+
external_routines: String or Path of the external routines SQL files. (default: None)
|
|
386
|
+
|
|
387
|
+
time_period_output_format: String with the possible values \
|
|
388
|
+
("sdmx_gregorian", "sdmx_reporting", "vtl") for the representation of the \
|
|
389
|
+
Time Period components.
|
|
390
|
+
|
|
391
|
+
return_only_persistent: If True, run function will only return the results of \
|
|
392
|
+
Persistent Assignments. (default: True)
|
|
393
|
+
|
|
394
|
+
output_folder: Path or S3 URI to the output folder. (default: None)
|
|
395
|
+
|
|
396
|
+
Returns:
|
|
397
|
+
The datasets are produced without data if the output folder is defined.
|
|
398
|
+
|
|
399
|
+
Raises:
|
|
400
|
+
SemanticError: If any dataset does not contain a valid `Schema` instance as its structure.
|
|
401
|
+
|
|
402
|
+
"""
|
|
403
|
+
mapping_dict = {}
|
|
404
|
+
input_names = _extract_input_datasets(script)
|
|
405
|
+
|
|
406
|
+
# Mapping handling
|
|
407
|
+
|
|
408
|
+
if mappings is None:
|
|
409
|
+
if len(datasets) != 1:
|
|
410
|
+
raise SemanticError("0-1-3-3")
|
|
411
|
+
if len(datasets) == 1:
|
|
412
|
+
if len(input_names) != 1:
|
|
413
|
+
raise SemanticError("0-1-3-1", number_datasets=len(input_names))
|
|
414
|
+
schema = datasets[0].structure
|
|
415
|
+
if not isinstance(schema, Schema):
|
|
416
|
+
raise SemanticError("0-1-3-2", schema=schema)
|
|
417
|
+
mapping_dict = {schema.short_urn: input_names[0]}
|
|
418
|
+
elif isinstance(mappings, Dict):
|
|
419
|
+
mapping_dict = mappings
|
|
420
|
+
elif isinstance(mappings, VtlDataflowMapping):
|
|
421
|
+
if mappings.to_vtl_mapping_method is not None:
|
|
422
|
+
warnings.warn(
|
|
423
|
+
"To_vtl_mapping_method is not implemented yet, we will use the Basic "
|
|
424
|
+
"method with old data."
|
|
425
|
+
)
|
|
426
|
+
if mappings.from_vtl_mapping_method is not None:
|
|
427
|
+
warnings.warn(
|
|
428
|
+
"From_vtl_mapping_method is not implemented yet, we will use the Basic "
|
|
429
|
+
"method with old data."
|
|
430
|
+
)
|
|
431
|
+
if isinstance(mappings.dataflow, str):
|
|
432
|
+
short_urn = str(parse_urn(mappings.dataflow))
|
|
433
|
+
elif isinstance(mappings.dataflow, (Reference, DataflowRef)):
|
|
434
|
+
short_urn = str(mappings.dataflow)
|
|
435
|
+
elif isinstance(mappings.dataflow, Dataflow):
|
|
436
|
+
short_urn = mappings.dataflow.short_urn
|
|
437
|
+
else:
|
|
438
|
+
raise TypeError(
|
|
439
|
+
"Expected str, Reference, DataflowRef or Dataflow type for dataflow in "
|
|
440
|
+
"VtlDataflowMapping."
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
mapping_dict = {short_urn: mappings.dataflow_alias}
|
|
444
|
+
else:
|
|
445
|
+
raise TypeError("Expected dict or VtlDataflowMapping type for mappings.")
|
|
446
|
+
|
|
447
|
+
for vtl_name in mapping_dict.values():
|
|
448
|
+
if vtl_name not in input_names:
|
|
449
|
+
raise SemanticError("0-1-3-5", dataset_name=vtl_name)
|
|
450
|
+
|
|
451
|
+
datapoints = {}
|
|
452
|
+
data_structures = []
|
|
453
|
+
for dataset in datasets:
|
|
454
|
+
schema = dataset.structure
|
|
455
|
+
if not isinstance(schema, Schema):
|
|
456
|
+
raise SemanticError("0-1-3-2", schema=schema)
|
|
457
|
+
if schema.short_urn not in mapping_dict:
|
|
458
|
+
raise SemanticError("0-1-3-4", short_urn=schema.short_urn)
|
|
459
|
+
# Generating VTL Datastructure and Datapoints.
|
|
460
|
+
dataset_name = mapping_dict[schema.short_urn]
|
|
461
|
+
vtl_structure = to_vtl_json(schema, dataset_name)
|
|
462
|
+
data_structures.append(vtl_structure)
|
|
463
|
+
datapoints[dataset_name] = dataset.data
|
|
464
|
+
|
|
465
|
+
missing = []
|
|
466
|
+
for input_name in input_names:
|
|
467
|
+
if input_name not in mapping_dict.values():
|
|
468
|
+
missing.append(input_name)
|
|
469
|
+
if missing:
|
|
470
|
+
raise SemanticError("0-1-3-6", missing=missing)
|
|
471
|
+
|
|
472
|
+
result = run(
|
|
473
|
+
script=script,
|
|
474
|
+
data_structures=data_structures,
|
|
475
|
+
datapoints=datapoints,
|
|
476
|
+
value_domains=value_domains,
|
|
477
|
+
external_routines=external_routines,
|
|
478
|
+
time_period_output_format=time_period_output_format,
|
|
479
|
+
return_only_persistent=return_only_persistent,
|
|
480
|
+
output_folder=output_folder,
|
|
481
|
+
)
|
|
482
|
+
return result
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
def generate_sdmx(
|
|
486
|
+
script: Union[str, Path], agency_id: str, id: str, version: str = "1.0"
|
|
487
|
+
) -> TransformationScheme:
|
|
488
|
+
"""
|
|
489
|
+
Function that generates a TransformationScheme object from a VTL script.
|
|
490
|
+
|
|
491
|
+
The TransformationScheme object is the SDMX representation of the VTL script. \
|
|
492
|
+
For more details please check the `SDMX IM VTL objects \
|
|
493
|
+
<https://sdmx.org/wp-content/uploads/SDMX_3-0-0_SECTION_2_FINAL-1_0.pdf#page=146>`_, line 2266.
|
|
494
|
+
|
|
495
|
+
Args:
|
|
496
|
+
script: A string with the VTL script.
|
|
497
|
+
agency_id: The Agency ID used in the generated `TransformationScheme` object.
|
|
498
|
+
id: The given id of the generated `TransformationScheme` object.
|
|
499
|
+
version: The Version used in the generated `TransformationScheme` object. (default: "1.0")
|
|
500
|
+
|
|
501
|
+
Returns:
|
|
502
|
+
The generated Transformation Scheme object.
|
|
503
|
+
"""
|
|
504
|
+
vtl = load_vtl(script)
|
|
505
|
+
ast = create_ast(vtl)
|
|
506
|
+
result = ast_to_sdmx(ast, agency_id, id, version)
|
|
507
|
+
return result
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"description": "VTL Metadata JSON serialization",
|
|
4
|
+
"$defs": {
|
|
5
|
+
"vtl-id": {
|
|
6
|
+
"type": "string",
|
|
7
|
+
"pattern": "^[a-zA-Z][a-zA-Z0-9_]*$|^'.*'$"
|
|
8
|
+
},
|
|
9
|
+
"set-type": {
|
|
10
|
+
"type": "array",
|
|
11
|
+
"uniqueItems": true,
|
|
12
|
+
"oneOf": [
|
|
13
|
+
{ "items": { "oneOf": [ { "type": "string" }, { "type": "null" } ] } },
|
|
14
|
+
{ "items": { "oneOf": [ { "type": "number" }, { "type": "null" } ] } }
|
|
15
|
+
]
|
|
16
|
+
},
|
|
17
|
+
"identifiable": {
|
|
18
|
+
"type": "object",
|
|
19
|
+
"properties": {
|
|
20
|
+
"name": { "$ref": "#/$defs/vtl-id" },
|
|
21
|
+
"description": { "type": "string" }
|
|
22
|
+
},
|
|
23
|
+
"required": [ "name" ]
|
|
24
|
+
}
|
|
25
|
+
},
|
|
26
|
+
"type": "object",
|
|
27
|
+
"properties": {
|
|
28
|
+
"datasets": {
|
|
29
|
+
"type": "array",
|
|
30
|
+
"items": {
|
|
31
|
+
"allOf": [ { "$ref": "#/$defs/identifiable" } ],
|
|
32
|
+
"properties": {
|
|
33
|
+
"source": { "type": "string" },
|
|
34
|
+
"structure": { "$ref": "#/$defs/vtl-id" }
|
|
35
|
+
},
|
|
36
|
+
"required": [ "structure" ]
|
|
37
|
+
}
|
|
38
|
+
},
|
|
39
|
+
"structures": {
|
|
40
|
+
"type": "array",
|
|
41
|
+
"items": {
|
|
42
|
+
"allOf": [ { "$ref": "#/$defs/identifiable" } ],
|
|
43
|
+
"properties": {
|
|
44
|
+
"components": {
|
|
45
|
+
"type": "array",
|
|
46
|
+
"items": {
|
|
47
|
+
"allOf": [ { "$ref": "#/$defs/identifiable" } ],
|
|
48
|
+
"properties": {
|
|
49
|
+
"role": {
|
|
50
|
+
"type": "string",
|
|
51
|
+
"enum": [ "Identifier", "Measure", "Attribute", "Viral Attribute" ]
|
|
52
|
+
},
|
|
53
|
+
"subset": { "$ref": "#/$defs/vtl-id" },
|
|
54
|
+
"nullable": { "type": "boolean" },
|
|
55
|
+
"data_type": {
|
|
56
|
+
"type": "string",
|
|
57
|
+
"enum": [ "String", "Number", "Integer", "Boolean", "Time", "TimePeriod", "Date", "Duration" ]
|
|
58
|
+
}
|
|
59
|
+
},
|
|
60
|
+
"required": [ "role" ]
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
},
|
|
64
|
+
"required": [ "components" ]
|
|
65
|
+
}
|
|
66
|
+
},
|
|
67
|
+
"variables": {
|
|
68
|
+
"type": "array",
|
|
69
|
+
"items": {
|
|
70
|
+
"allOf": [ { "$ref": "#/$defs/identifiable" } ],
|
|
71
|
+
"properties": {
|
|
72
|
+
"domain": { "$ref": "#/$defs/vtl-id" }
|
|
73
|
+
},
|
|
74
|
+
"required": [ "domain" ]
|
|
75
|
+
}
|
|
76
|
+
},
|
|
77
|
+
"domains": {
|
|
78
|
+
"type": "array",
|
|
79
|
+
"items": {
|
|
80
|
+
"allOf": [ { "$ref": "#/$defs/identifiable" } ],
|
|
81
|
+
"unevaluatedProperties": false,
|
|
82
|
+
"oneOf": [
|
|
83
|
+
{
|
|
84
|
+
"properties": {
|
|
85
|
+
"externalRef": { "type": "string" }
|
|
86
|
+
},
|
|
87
|
+
"required": [ "externalRef" ]
|
|
88
|
+
}, {
|
|
89
|
+
"properties": {
|
|
90
|
+
"parent": { "$ref": "#/$defs/vtl-id" }
|
|
91
|
+
},
|
|
92
|
+
"required": [ "parent" ],
|
|
93
|
+
"oneOf": [{
|
|
94
|
+
"properties": {
|
|
95
|
+
"restriction": { "$ref": "#/$defs/set-type" }
|
|
96
|
+
},
|
|
97
|
+
"required": [ "restriction" ]
|
|
98
|
+
}, {
|
|
99
|
+
"properties": {
|
|
100
|
+
"enumerated": { "$ref": "#/$defs/set-type" }
|
|
101
|
+
},
|
|
102
|
+
"required": [ "enumerated" ]
|
|
103
|
+
}, {
|
|
104
|
+
"properties": {
|
|
105
|
+
"described": { "type": "string" }
|
|
106
|
+
},
|
|
107
|
+
"required": [ "described" ]
|
|
108
|
+
}
|
|
109
|
+
]
|
|
110
|
+
}
|
|
111
|
+
]
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from antlr4 import CommonTokenStream, InputStream
|
|
2
|
+
from antlr4.Token import CommonToken
|
|
3
|
+
|
|
4
|
+
from vtlengine.API import create_ast
|
|
5
|
+
from vtlengine.AST import Comment, Start
|
|
6
|
+
from vtlengine.AST.ASTConstructorModules import extract_token_info
|
|
7
|
+
from vtlengine.AST.Grammar.lexer import Lexer
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def generate_ast_comment(token: CommonToken) -> Comment:
|
|
11
|
+
"""
|
|
12
|
+
Parses a token belonging to a comment and returns a Comment AST object.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
token (str): The comment string to parse.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
Comment: A Comment AST object.
|
|
19
|
+
"""
|
|
20
|
+
token_info = extract_token_info(token)
|
|
21
|
+
text = token.text
|
|
22
|
+
if token.type == Lexer.SL_COMMENT:
|
|
23
|
+
text = token.text[:-1] # Remove the trailing newline character
|
|
24
|
+
return Comment(value=text, **token_info)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def create_ast_with_comments(text: str) -> Start:
|
|
28
|
+
"""
|
|
29
|
+
Parses a VTL script and returns an AST with comments.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
text (str): The VTL script to parse.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
AST: The generated AST with comments.
|
|
36
|
+
"""
|
|
37
|
+
# Call the create_ast function to generate the AST from channel 0
|
|
38
|
+
ast = create_ast(text)
|
|
39
|
+
|
|
40
|
+
# Reading the script on channel 2 to get the comments
|
|
41
|
+
lexer_ = Lexer(InputStream(text))
|
|
42
|
+
stream = CommonTokenStream(lexer_, channel=2)
|
|
43
|
+
|
|
44
|
+
# Fill the stream with tokens on the buffer
|
|
45
|
+
stream.fill()
|
|
46
|
+
|
|
47
|
+
# Extract comments from the stream
|
|
48
|
+
comments = [generate_ast_comment(token) for token in stream.tokens if token.channel == 2]
|
|
49
|
+
|
|
50
|
+
# Add comments to the AST
|
|
51
|
+
ast.children.extend(comments)
|
|
52
|
+
|
|
53
|
+
# Sort the ast children based on their start line and column
|
|
54
|
+
ast.children.sort(key=lambda x: (x.line_start, x.column_start))
|
|
55
|
+
|
|
56
|
+
return ast
|