vtlengine 1.4.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. vtlengine/API/_InternalApi.py +791 -0
  2. vtlengine/API/__init__.py +612 -0
  3. vtlengine/API/data/schema/external_routines_schema.json +34 -0
  4. vtlengine/API/data/schema/json_schema_2.1.json +116 -0
  5. vtlengine/API/data/schema/value_domain_schema.json +97 -0
  6. vtlengine/AST/ASTComment.py +57 -0
  7. vtlengine/AST/ASTConstructor.py +598 -0
  8. vtlengine/AST/ASTConstructorModules/Expr.py +1928 -0
  9. vtlengine/AST/ASTConstructorModules/ExprComponents.py +995 -0
  10. vtlengine/AST/ASTConstructorModules/Terminals.py +790 -0
  11. vtlengine/AST/ASTConstructorModules/__init__.py +50 -0
  12. vtlengine/AST/ASTDataExchange.py +10 -0
  13. vtlengine/AST/ASTEncoders.py +32 -0
  14. vtlengine/AST/ASTString.py +675 -0
  15. vtlengine/AST/ASTTemplate.py +558 -0
  16. vtlengine/AST/ASTVisitor.py +25 -0
  17. vtlengine/AST/DAG/__init__.py +479 -0
  18. vtlengine/AST/DAG/_words.py +10 -0
  19. vtlengine/AST/Grammar/Vtl.g4 +705 -0
  20. vtlengine/AST/Grammar/VtlTokens.g4 +409 -0
  21. vtlengine/AST/Grammar/__init__.py +0 -0
  22. vtlengine/AST/Grammar/lexer.py +2139 -0
  23. vtlengine/AST/Grammar/parser.py +16597 -0
  24. vtlengine/AST/Grammar/tokens.py +169 -0
  25. vtlengine/AST/VtlVisitor.py +824 -0
  26. vtlengine/AST/__init__.py +674 -0
  27. vtlengine/DataTypes/TimeHandling.py +562 -0
  28. vtlengine/DataTypes/__init__.py +863 -0
  29. vtlengine/DataTypes/_time_checking.py +135 -0
  30. vtlengine/Exceptions/__exception_file_generator.py +96 -0
  31. vtlengine/Exceptions/__init__.py +159 -0
  32. vtlengine/Exceptions/messages.py +1004 -0
  33. vtlengine/Interpreter/__init__.py +2048 -0
  34. vtlengine/Model/__init__.py +501 -0
  35. vtlengine/Operators/Aggregation.py +357 -0
  36. vtlengine/Operators/Analytic.py +455 -0
  37. vtlengine/Operators/Assignment.py +23 -0
  38. vtlengine/Operators/Boolean.py +106 -0
  39. vtlengine/Operators/CastOperator.py +451 -0
  40. vtlengine/Operators/Clause.py +366 -0
  41. vtlengine/Operators/Comparison.py +488 -0
  42. vtlengine/Operators/Conditional.py +495 -0
  43. vtlengine/Operators/General.py +191 -0
  44. vtlengine/Operators/HROperators.py +254 -0
  45. vtlengine/Operators/Join.py +447 -0
  46. vtlengine/Operators/Numeric.py +422 -0
  47. vtlengine/Operators/RoleSetter.py +77 -0
  48. vtlengine/Operators/Set.py +176 -0
  49. vtlengine/Operators/String.py +578 -0
  50. vtlengine/Operators/Time.py +1144 -0
  51. vtlengine/Operators/Validation.py +275 -0
  52. vtlengine/Operators/__init__.py +900 -0
  53. vtlengine/Utils/__Virtual_Assets.py +34 -0
  54. vtlengine/Utils/__init__.py +479 -0
  55. vtlengine/__extras_check.py +17 -0
  56. vtlengine/__init__.py +27 -0
  57. vtlengine/files/__init__.py +0 -0
  58. vtlengine/files/output/__init__.py +35 -0
  59. vtlengine/files/output/_time_period_representation.py +55 -0
  60. vtlengine/files/parser/__init__.py +240 -0
  61. vtlengine/files/parser/_rfc_dialect.py +22 -0
  62. vtlengine/py.typed +0 -0
  63. vtlengine-1.4.0rc2.dist-info/METADATA +89 -0
  64. vtlengine-1.4.0rc2.dist-info/RECORD +66 -0
  65. vtlengine-1.4.0rc2.dist-info/WHEEL +4 -0
  66. vtlengine-1.4.0rc2.dist-info/licenses/LICENSE.md +661 -0
@@ -0,0 +1,612 @@
1
+ import warnings
2
+ from pathlib import Path
3
+ from typing import Any, Dict, List, Optional, Sequence, Union
4
+
5
+ import pandas as pd
6
+ from antlr4 import CommonTokenStream, InputStream # type: ignore[import-untyped]
7
+ from antlr4.error.ErrorListener import ErrorListener # type: ignore[import-untyped]
8
+ from pysdmx.io.pd import PandasDataset
9
+ from pysdmx.model import DataflowRef, Reference, TransformationScheme
10
+ from pysdmx.model.dataflow import Dataflow, Schema
11
+ from pysdmx.model.vtl import VtlDataflowMapping
12
+ from pysdmx.util import parse_urn
13
+
14
+ from vtlengine.API._InternalApi import (
15
+ _check_output_folder,
16
+ _check_script,
17
+ _return_only_persistent_datasets,
18
+ ast_to_sdmx,
19
+ load_datasets,
20
+ load_datasets_with_data,
21
+ load_external_routines,
22
+ load_value_domains,
23
+ load_vtl,
24
+ to_vtl_json,
25
+ )
26
+ from vtlengine.AST import Start
27
+ from vtlengine.AST.ASTConstructor import ASTVisitor
28
+ from vtlengine.AST.ASTString import ASTString
29
+ from vtlengine.AST.DAG import DAGAnalyzer
30
+ from vtlengine.AST.Grammar.lexer import Lexer
31
+ from vtlengine.AST.Grammar.parser import Parser
32
+ from vtlengine.Exceptions import InputValidationException
33
+ from vtlengine.files.output._time_period_representation import (
34
+ TimePeriodRepresentation,
35
+ format_time_period_external_representation,
36
+ )
37
+ from vtlengine.Interpreter import InterpreterAnalyzer
38
+ from vtlengine.Model import Dataset, Scalar
39
+
40
+ pd.options.mode.chained_assignment = None
41
+
42
+
43
+ class __VTLSingleErrorListener(ErrorListener): # type: ignore[misc]
44
+ """ """
45
+
46
+ def syntaxError(
47
+ self,
48
+ recognizer: Any,
49
+ offendingSymbol: str,
50
+ line: str,
51
+ column: str,
52
+ msg: str,
53
+ e: Any,
54
+ ) -> None:
55
+ raise Exception(
56
+ f"Not valid VTL Syntax \n "
57
+ f"offendingSymbol: {offendingSymbol} \n "
58
+ f"msg: {msg} \n "
59
+ f"line: {line}"
60
+ )
61
+
62
+
63
+ def _lexer(text: str) -> CommonTokenStream:
64
+ """
65
+ Lexing
66
+ """
67
+ lexer_ = Lexer(InputStream(text))
68
+ lexer_._listeners = [__VTLSingleErrorListener()]
69
+ stream = CommonTokenStream(lexer_)
70
+
71
+ return stream
72
+
73
+
74
+ def _parser(stream: CommonTokenStream) -> Any:
75
+ """
76
+ Parse the expression
77
+ """
78
+ vtl_parser = Parser(stream)
79
+ vtl_parser._listeners = [__VTLSingleErrorListener()]
80
+ return vtl_parser.start()
81
+
82
+
83
+ def _extract_input_datasets(script: Union[str, TransformationScheme, Path]) -> str:
84
+ if isinstance(script, TransformationScheme):
85
+ vtl_script = _check_script(script)
86
+ elif isinstance(script, (str, Path)):
87
+ vtl_script = load_vtl(script)
88
+ else:
89
+ raise TypeError("Unsupported script type.")
90
+
91
+ ast = create_ast(vtl_script)
92
+ dag_inputs = DAGAnalyzer.ds_structure(ast)["global_inputs"]
93
+
94
+ return dag_inputs
95
+
96
+
97
+ def prettify(script: Union[str, TransformationScheme, Path]) -> str:
98
+ """
99
+ Function that prettifies the VTL script given.
100
+
101
+ Args:
102
+ script: VTL script as a string, a Transformation Scheme object or Path with the VTL script.
103
+
104
+ Returns:
105
+ A str with the prettified VTL script.
106
+ """
107
+ from vtlengine.AST.ASTComment import create_ast_with_comments
108
+
109
+ checking = _check_script(script)
110
+ vtl = load_vtl(checking)
111
+ ast = create_ast_with_comments(vtl)
112
+ return ASTString(pretty=True).render(ast)
113
+
114
+
115
+ def create_ast(text: str) -> Start:
116
+ """
117
+ Function that creates the AST object.
118
+
119
+ Args:
120
+ text: Vtl string expression that will be used to create the AST object.
121
+
122
+ Returns:
123
+ The ast object.
124
+
125
+ Raises:
126
+ Exception: When the vtl syntax expression is wrong.
127
+ """
128
+ text = text + "\n"
129
+ stream = _lexer(text)
130
+ cst = _parser(stream)
131
+ visitor = ASTVisitor()
132
+ ast = visitor.visitStart(cst)
133
+ DAGAnalyzer.createDAG(ast)
134
+ return ast
135
+
136
+
137
+ def validate_dataset(
138
+ data_structures: Union[Dict[str, Any], Path, List[Union[Dict[str, Any], Path]]],
139
+ datapoints: Optional[
140
+ Union[Dict[str, Union[pd.DataFrame, Path, str]], List[Union[str, Path]], Path, str]
141
+ ] = None,
142
+ scalar_values: Optional[Dict[str, Optional[Union[int, str, bool, float]]]] = None,
143
+ ) -> None:
144
+ """
145
+ Validate that datasets can be loaded from the given data_structures and optional datapoints.
146
+
147
+ Args:
148
+ data_structures: Dict, Path, or List of Dict/Path objects representing data structures.
149
+ datapoints: Optional Dict, Path, or List of Dict/Path objects representing datapoints.
150
+ scalar_values: Optional Dict with scalar values to be used in the datasets.
151
+
152
+ Raises:
153
+ Exception: If the data structures or datapoints are invalid or cannot be loaded.
154
+ """
155
+ load_datasets_with_data(data_structures, datapoints, scalar_values)
156
+
157
+
158
+ def validate_value_domain(
159
+ input: Union[Dict[str, Any], Path, List[Union[Dict[str, Any], Path]]],
160
+ ) -> None:
161
+ """
162
+ Validate ValueDomain(s) using JSON Schema.
163
+
164
+ Args:
165
+ input: Dict, Path, or List of Dict/Path objects representing value domain definitions.
166
+
167
+ Raises:
168
+ Exception: If the input file is invalid, does not exist,
169
+ or the JSON content does not follow the schema.
170
+ """
171
+ load_value_domains(input)
172
+
173
+
174
+ def validate_external_routine(
175
+ input: Union[Dict[str, Any], Path, List[Union[Dict[str, Any], Path]]],
176
+ ) -> None:
177
+ """
178
+ Validate External Routine(s) using JSON Schema and SQLGlot.
179
+
180
+ Args:
181
+ input: Dict, Path, or List of Dict/Path objects representing external routines.
182
+
183
+ Raises:
184
+ Exception: If JSON schema validation fails,
185
+ SQL syntax is invalid, or file type is wrong.
186
+ """
187
+ load_external_routines(input)
188
+
189
+
190
+ def semantic_analysis(
191
+ script: Union[str, TransformationScheme, Path],
192
+ data_structures: Union[Dict[str, Any], Path, List[Dict[str, Any]], List[Path]],
193
+ value_domains: Optional[Union[Dict[str, Any], Path, List[Union[Dict[str, Any], Path]]]] = None,
194
+ external_routines: Optional[
195
+ Union[Dict[str, Any], Path, List[Union[Dict[str, Any], Path]]]
196
+ ] = None,
197
+ ) -> Dict[str, Dataset]:
198
+ """
199
+ Checks if the vtl scripts and its related datastructures are valid. As part of the compatibility
200
+ with pysdmx library, the vtl script can be a Transformation Scheme object, which availability as
201
+ input is going to be serialized as a string VTL script.
202
+
203
+ Concepts you may need to know:
204
+
205
+ - Vtl script: The script that shows the set of operations to be executed.
206
+
207
+ - Data Structure: JSON file that contains the structure and the name for the dataset(s) \
208
+ (and/or scalar) about the datatype (String, integer or number), \
209
+ the role (Identifier, Attribute or Measure) and the nullability each component has.
210
+
211
+ - Value domains: Collection of unique values on the same datatype.
212
+
213
+ - External routines: SQL query used to transform a dataset.
214
+
215
+ This function has the following params:
216
+
217
+ Args:
218
+ script: Vtl script as a string, Transformation Scheme object or Path to the folder \
219
+ that holds the vtl script.
220
+ data_structures: Dict or Path (file or folder), \
221
+ or List of Dicts or Paths with the data structures JSON files.
222
+ value_domains: Dict or Path, or List of Dicts or Paths of the \
223
+ value domains JSON files. (default:None) It is passed as an object, that can be read from \
224
+ a Path or from a dictionary. Furthermore, a list of those objects can be passed. \
225
+ Check the following example: \
226
+ :ref:`Example 5 <example_5_run_with_multiple_value_domains_and_external_routines>`.
227
+
228
+ external_routines: String or Path, or List of Strings or Paths of the \
229
+ external routines SQL files. (default: None) It is passed as an object, that can be read \
230
+ from a Path or from a dictionary. Furthermore, a list of those objects can be passed. \
231
+ Check the following example: \
232
+ :ref:`Example 5 <example_5_run_with_multiple_value_domains_and_external_routines>`.
233
+
234
+ Returns:
235
+ The computed datasets.
236
+
237
+ Raises:
238
+ Exception: If the files have the wrong format, or they do not exist, \
239
+ or their Paths are invalid.
240
+ """
241
+
242
+ # AST generation
243
+ checking = _check_script(script)
244
+ vtl = load_vtl(checking)
245
+ ast = create_ast(vtl)
246
+
247
+ # Loading datasets
248
+ datasets, scalars = load_datasets(data_structures)
249
+
250
+ # Handling of library items
251
+ vd = None
252
+ if value_domains is not None:
253
+ vd = load_value_domains(value_domains)
254
+ ext_routines = None
255
+ if external_routines is not None:
256
+ ext_routines = load_external_routines(external_routines)
257
+
258
+ # Running the interpreter
259
+ interpreter = InterpreterAnalyzer(
260
+ datasets=datasets,
261
+ value_domains=vd,
262
+ external_routines=ext_routines,
263
+ scalars=scalars,
264
+ only_semantic=True,
265
+ )
266
+ result = interpreter.visit(ast)
267
+ return result
268
+
269
+
270
+ def run(
271
+ script: Union[str, TransformationScheme, Path],
272
+ data_structures: Union[Dict[str, Any], Path, List[Dict[str, Any]], List[Path]],
273
+ datapoints: Union[Dict[str, Union[pd.DataFrame, str, Path]], List[Union[str, Path]], str, Path],
274
+ value_domains: Optional[Union[Dict[str, Any], Path, List[Union[Dict[str, Any], Path]]]] = None,
275
+ external_routines: Optional[
276
+ Union[Dict[str, Any], Path, List[Union[Dict[str, Any], Path]]]
277
+ ] = None,
278
+ time_period_output_format: str = "vtl",
279
+ return_only_persistent: bool = True,
280
+ output_folder: Optional[Union[str, Path]] = None,
281
+ scalar_values: Optional[Dict[str, Optional[Union[int, str, bool, float]]]] = None,
282
+ ) -> Dict[str, Union[Dataset, Scalar]]:
283
+ """
284
+ Run is the main function of the ``API``, which mission is to execute
285
+ the vtl operation over the data.
286
+
287
+ Concepts you may need to know:
288
+
289
+ - Vtl script: The script that shows the set of operations to be executed.
290
+
291
+ - Data Structure: JSON file that contains the structure and the name for the dataset(s) \
292
+ (and/or scalar) about the datatype (String, integer or number), \
293
+ the role (Identifier, Attribute or Measure) and the nullability each component has.
294
+
295
+ - Data point: `Pandas Dataframe \
296
+ <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_ \
297
+ that holds the data related to the Dataset.
298
+
299
+ - Value domains: Collection of unique values on the same datatype.
300
+
301
+ - External routines: SQL query used to transform a dataset.
302
+
303
+ .. important::
304
+ The data structure and the data points must have the same dataset
305
+ name to be loaded correctly.
306
+
307
+ .. important::
308
+ If pointing to a Path or an S3 URI, dataset_name will be taken from the file name.
309
+ Example: If the path is 'path/to/data.csv', the dataset name will be 'data'.
310
+
311
+ .. important::
312
+ If using an S3 URI, the path must be in the format:
313
+
314
+ s3://bucket-name/path/to/data.csv
315
+
316
+ The following environment variables must be set (from the AWS account):
317
+
318
+ - AWS_ACCESS_KEY_ID
319
+ - AWS_SECRET_ACCESS_KEY
320
+
321
+ For more details, see
322
+ `s3fs documentation <https://s3fs.readthedocs.io/en/latest/index.html#credentials>`_.
323
+
324
+ Before the execution, the DAG analysis reviews if the VTL script is a direct acyclic graph.
325
+
326
+ This function has the following params:
327
+
328
+ Args:
329
+ script: VTL script as a string, a Transformation Scheme object or Path with the VTL script.
330
+
331
+ data_structures: Dict, Path or a List of Dicts or Paths with the data structures.
332
+
333
+ datapoints: Dict, Path, S3 URI or List of S3 URIs or Paths with data. \
334
+ You can also use a custom name for the dataset by passing a dictionary with \
335
+ the dataset name as key and the Path, S3 URI or DataFrame as value. \
336
+ Check the following example: \
337
+ :ref:`Example 6 <example_6_run_using_paths>`.
338
+
339
+ value_domains: Dict or Path, or List of Dicts or Paths of the \
340
+ value domains JSON files. (default:None) It is passed as an object, that can be read from \
341
+ a Path or from a dictionary. Furthermore, a list of those objects can be passed. \
342
+ Check the following example: \
343
+ :ref:`Example 5 <example_5_run_with_multiple_value_domains_and_external_routines>`.
344
+
345
+ external_routines: String or Path, or List of Strings or Paths of the \
346
+ external routines JSON files. (default: None) It is passed as an object, that can be read \
347
+ from a Path or from a dictionary. Furthermore, a list of those objects can be passed. \
348
+ Check the following example: \
349
+ :ref:`Example 5 <example_5_run_with_multiple_value_domains_and_external_routines>`.
350
+
351
+ time_period_output_format: String with the possible values \
352
+ ("sdmx_gregorian", "sdmx_reporting", "vtl") for the representation of the \
353
+ Time Period components.
354
+
355
+ return_only_persistent: If True, run function will only return the results of \
356
+ Persistent Assignments. (default: True)
357
+
358
+ output_folder: Path or S3 URI to the output folder. (default: None)
359
+
360
+ scalar_values: Dict with the scalar values to be used in the VTL script. \
361
+
362
+
363
+ Returns:
364
+ The datasets are produced without data if the output folder is defined.
365
+
366
+ Raises:
367
+ Exception: If the files have the wrong format, or they do not exist, \
368
+ or their Paths are invalid.
369
+
370
+ """
371
+
372
+ # AST generation
373
+ script = _check_script(script)
374
+ vtl = load_vtl(script)
375
+ ast = create_ast(vtl)
376
+
377
+ # Loading datasets and datapoints
378
+ datasets, scalars, path_dict = load_datasets_with_data(
379
+ data_structures, datapoints, scalar_values
380
+ )
381
+
382
+ # Handling of library items
383
+ vd = None
384
+ if value_domains is not None:
385
+ vd = load_value_domains(value_domains)
386
+ ext_routines = None
387
+ if external_routines is not None:
388
+ ext_routines = load_external_routines(external_routines)
389
+
390
+ # Checking time period output format value
391
+ time_period_representation = TimePeriodRepresentation.check_value(time_period_output_format)
392
+
393
+ # VTL Efficient analysis
394
+ ds_analysis = DAGAnalyzer.ds_structure(ast)
395
+
396
+ # Checking the output path to be a Path object to a directory
397
+ if output_folder is not None:
398
+ _check_output_folder(output_folder)
399
+
400
+ # Running the interpreter
401
+ interpreter = InterpreterAnalyzer(
402
+ datasets=datasets,
403
+ value_domains=vd,
404
+ external_routines=ext_routines,
405
+ ds_analysis=ds_analysis,
406
+ datapoints_paths=path_dict,
407
+ output_path=output_folder,
408
+ time_period_representation=time_period_representation,
409
+ return_only_persistent=return_only_persistent,
410
+ scalars=scalars,
411
+ )
412
+ result = interpreter.visit(ast)
413
+
414
+ # Applying time period output format
415
+ if output_folder is None:
416
+ for obj in result.values():
417
+ if isinstance(obj, (Dataset, Scalar)):
418
+ format_time_period_external_representation(obj, time_period_representation)
419
+
420
+ # Returning only persistent datasets
421
+ if return_only_persistent:
422
+ return _return_only_persistent_datasets(result, ast)
423
+ return result
424
+
425
+
426
+ def run_sdmx( # noqa: C901
427
+ script: Union[str, TransformationScheme, Path],
428
+ datasets: Sequence[PandasDataset],
429
+ mappings: Optional[Union[VtlDataflowMapping, Dict[str, str]]] = None,
430
+ value_domains: Optional[Union[Dict[str, Any], Path, List[Union[Dict[str, Any], Path]]]] = None,
431
+ external_routines: Optional[
432
+ Union[Dict[str, Any], Path, List[Union[Dict[str, Any], Path]]]
433
+ ] = None,
434
+ time_period_output_format: str = "vtl",
435
+ return_only_persistent: bool = True,
436
+ output_folder: Optional[Union[str, Path]] = None,
437
+ ) -> Dict[str, Union[Dataset, Scalar]]:
438
+ """
439
+ Executes a VTL script using a list of pysdmx `PandasDataset` objects.
440
+
441
+ This function prepares the required VTL data structures and datapoints from
442
+ the given list of pysdmx `PandasDataset` objects. It validates each
443
+ `PandasDataset` uses a valid `Schema` instance as its structure. Each `Schema` is converted
444
+ to the appropriate VTL JSON data structure, and the Pandas Dataframe is extracted.
445
+
446
+ .. important::
447
+ We recommend to use this function in combination with the
448
+ `get_datasets <https://py.sdmx.io/howto/data_rw.html#pysdmx.io.get_datasets>`_
449
+ pysdmx method.
450
+
451
+ .. important::
452
+ The mapping between pysdmx `PandasDataset
453
+ <https://py.sdmx.io/howto/data_rw.html#pysdmx.io.pd.PandasDataset>`_ \
454
+ and VTL datasets is done using the `Schema` instance of the `PandasDataset`.
455
+ The Schema ID is used as the dataset name.
456
+
457
+ DataStructure=MD:TEST_DS(1.0) -> TEST_DS
458
+
459
+ The function then calls the :obj:`run <vtlengine.API>` function with the provided VTL
460
+ script and prepared inputs.
461
+
462
+ Before the execution, the DAG analysis reviews if the generated VTL script is a direct acyclic
463
+ graph.
464
+
465
+ Args:
466
+ script: VTL script as a string, a Transformation Scheme object or Path with the VTL script.
467
+
468
+ datasets: A list of PandasDataset.
469
+
470
+ mappings: A dictionary or VtlDataflowMapping object that maps the dataset names.
471
+
472
+ value_domains: Dict or Path, or List of Dicts or Paths of the \
473
+ value domains JSON files. (default:None) It is passed as an object, that can be read from \
474
+ a Path or from a dictionary. Furthermore, a list of those objects can be passed. \
475
+ Check the following example: \
476
+ :ref:`Example 5 <example_5_run_with_multiple_value_domains_and_external_routines>`.
477
+
478
+ external_routines: String or Path, or List of Strings or Paths of the \
479
+ external routines JSON files. (default: None) It is passed as an object, that can be read \
480
+ from a Path or from a dictionary. Furthermore, a list of those objects can be passed. \
481
+ Check the following example: \
482
+ :ref:`Example 5 <example_5_run_with_multiple_value_domains_and_external_routines>`.
483
+
484
+ time_period_output_format: String with the possible values \
485
+ ("sdmx_gregorian", "sdmx_reporting", "vtl") for the representation of the \
486
+ Time Period components.
487
+
488
+ return_only_persistent: If True, run function will only return the results of \
489
+ Persistent Assignments. (default: True)
490
+
491
+ output_folder: Path or S3 URI to the output folder. (default: None)
492
+
493
+ Returns:
494
+ The datasets are produced without data if the output folder is defined.
495
+
496
+ Raises:
497
+ SemanticError: If any dataset does not contain a valid `Schema` instance as its structure.
498
+
499
+ """
500
+ mapping_dict = {}
501
+ input_names = _extract_input_datasets(script)
502
+
503
+ if not isinstance(datasets, (list, set)) or any(
504
+ not isinstance(ds, PandasDataset) for ds in datasets
505
+ ):
506
+ type_ = type(datasets).__name__
507
+ if isinstance(datasets, (list, set)):
508
+ object_typing = {type(o).__name__ for o in datasets}
509
+ type_ = f"{type_}[{', '.join(object_typing)}]"
510
+ raise InputValidationException("0-1-3-7", type_=type_)
511
+
512
+ # Mapping handling
513
+ if mappings is None:
514
+ if len(datasets) != 1:
515
+ raise InputValidationException("0-1-3-3")
516
+ if len(datasets) == 1:
517
+ if len(input_names) != 1:
518
+ raise InputValidationException("0-1-3-1", number_datasets=len(input_names))
519
+ schema = datasets[0].structure
520
+ if not isinstance(schema, Schema):
521
+ raise InputValidationException("0-1-3-2", schema=schema)
522
+ mapping_dict = {schema.short_urn: input_names[0]}
523
+ elif isinstance(mappings, Dict):
524
+ mapping_dict = mappings
525
+ elif isinstance(mappings, VtlDataflowMapping):
526
+ if mappings.to_vtl_mapping_method is not None:
527
+ warnings.warn(
528
+ "To_vtl_mapping_method is not implemented yet, we will use the Basic "
529
+ "method with old data."
530
+ )
531
+ if mappings.from_vtl_mapping_method is not None:
532
+ warnings.warn(
533
+ "From_vtl_mapping_method is not implemented yet, we will use the Basic "
534
+ "method with old data."
535
+ )
536
+ if isinstance(mappings.dataflow, str):
537
+ short_urn = str(parse_urn(mappings.dataflow))
538
+ elif isinstance(mappings.dataflow, (Reference, DataflowRef)):
539
+ short_urn = str(mappings.dataflow)
540
+ elif isinstance(mappings.dataflow, Dataflow):
541
+ short_urn = mappings.dataflow.short_urn
542
+ else:
543
+ raise InputValidationException(
544
+ "Expected str, Reference, DataflowRef or Dataflow type for dataflow in "
545
+ "VtlDataflowMapping."
546
+ )
547
+
548
+ mapping_dict = {short_urn: mappings.dataflow_alias}
549
+ else:
550
+ raise InputValidationException("Expected dict or VtlDataflowMapping type for mappings.")
551
+
552
+ for vtl_name in mapping_dict.values():
553
+ if vtl_name not in input_names:
554
+ raise InputValidationException("0-1-3-5", dataset_name=vtl_name)
555
+
556
+ datapoints = {}
557
+ data_structures = []
558
+ for dataset in datasets:
559
+ schema = dataset.structure
560
+ if not isinstance(schema, Schema):
561
+ raise InputValidationException("0-1-3-2", schema=schema)
562
+ if schema.short_urn not in mapping_dict:
563
+ raise InputValidationException("0-1-3-4", short_urn=schema.short_urn)
564
+ # Generating VTL Datastructure and Datapoints.
565
+ dataset_name = mapping_dict[schema.short_urn]
566
+ vtl_structure = to_vtl_json(schema, dataset_name)
567
+ data_structures.append(vtl_structure)
568
+ datapoints[dataset_name] = dataset.data
569
+
570
+ missing = []
571
+ for input_name in input_names:
572
+ if input_name not in mapping_dict.values():
573
+ missing.append(input_name)
574
+ if missing:
575
+ raise InputValidationException("0-1-3-6", missing=missing)
576
+
577
+ result = run(
578
+ script=script,
579
+ data_structures=data_structures,
580
+ datapoints=datapoints,
581
+ value_domains=value_domains,
582
+ external_routines=external_routines,
583
+ time_period_output_format=time_period_output_format,
584
+ return_only_persistent=return_only_persistent,
585
+ output_folder=output_folder,
586
+ )
587
+ return result
588
+
589
+
590
+ def generate_sdmx(
591
+ script: Union[str, Path], agency_id: str, id: str, version: str = "1.0"
592
+ ) -> TransformationScheme:
593
+ """
594
+ Function that generates a TransformationScheme object from a VTL script.
595
+
596
+ The TransformationScheme object is the SDMX representation of the VTL script. \
597
+ For more details please check the `SDMX IM VTL objects \
598
+ <https://sdmx.org/wp-content/uploads/SDMX_3-0-0_SECTION_2_FINAL-1_0.pdf#page=146>`_, line 2266.
599
+
600
+ Args:
601
+ script: A string with the VTL script.
602
+ agency_id: The Agency ID used in the generated `TransformationScheme` object.
603
+ id: The given id of the generated `TransformationScheme` object.
604
+ version: The Version used in the generated `TransformationScheme` object. (default: "1.0")
605
+
606
+ Returns:
607
+ The generated Transformation Scheme object.
608
+ """
609
+ vtl = load_vtl(script)
610
+ ast = create_ast(vtl)
611
+ result = ast_to_sdmx(ast, agency_id, id, version)
612
+ return result
@@ -0,0 +1,34 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "definitions": {
4
+ "sqlQuery": {
5
+ "type": "object",
6
+ "properties": {
7
+ "name": {
8
+ "type": "string",
9
+ "minLength": 1,
10
+ "description": "Identifier for the SQL query"
11
+ },
12
+ "query": {
13
+ "type": "string",
14
+ "minLength": 1,
15
+ "description": "SQL query statement"
16
+ }
17
+ },
18
+ "required": ["name", "query"],
19
+ "additionalProperties": false
20
+ }
21
+ },
22
+ "oneOf": [
23
+ {
24
+ "$ref": "#/definitions/sqlQuery"
25
+ },
26
+ {
27
+ "type": "array",
28
+ "items": {
29
+ "$ref": "#/definitions/sqlQuery"
30
+ },
31
+ "minItems": 1
32
+ }
33
+ ]
34
+ }