vtlengine 1.1rc2__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vtlengine might be problematic. Click here for more details.

Files changed (44) hide show
  1. vtlengine/API/_InternalApi.py +288 -29
  2. vtlengine/API/__init__.py +277 -70
  3. vtlengine/AST/ASTComment.py +56 -0
  4. vtlengine/AST/ASTConstructor.py +71 -18
  5. vtlengine/AST/ASTConstructorModules/Expr.py +197 -75
  6. vtlengine/AST/ASTConstructorModules/ExprComponents.py +81 -38
  7. vtlengine/AST/ASTConstructorModules/Terminals.py +76 -31
  8. vtlengine/AST/ASTConstructorModules/__init__.py +50 -0
  9. vtlengine/AST/ASTEncoders.py +4 -0
  10. vtlengine/AST/ASTString.py +622 -0
  11. vtlengine/AST/ASTTemplate.py +28 -2
  12. vtlengine/AST/DAG/__init__.py +44 -6
  13. vtlengine/AST/DAG/_words.py +1 -0
  14. vtlengine/AST/Grammar/Vtl.g4 +7 -7
  15. vtlengine/AST/Grammar/lexer.py +19759 -1112
  16. vtlengine/AST/Grammar/parser.py +17996 -3199
  17. vtlengine/AST/__init__.py +127 -14
  18. vtlengine/Exceptions/messages.py +14 -2
  19. vtlengine/Interpreter/__init__.py +90 -11
  20. vtlengine/Model/__init__.py +9 -4
  21. vtlengine/Operators/Aggregation.py +13 -6
  22. vtlengine/Operators/Analytic.py +19 -13
  23. vtlengine/Operators/CastOperator.py +5 -2
  24. vtlengine/Operators/Clause.py +26 -18
  25. vtlengine/Operators/Comparison.py +3 -1
  26. vtlengine/Operators/Conditional.py +40 -18
  27. vtlengine/Operators/General.py +3 -1
  28. vtlengine/Operators/HROperators.py +3 -1
  29. vtlengine/Operators/Join.py +4 -2
  30. vtlengine/Operators/Time.py +22 -15
  31. vtlengine/Operators/Validation.py +5 -2
  32. vtlengine/Operators/__init__.py +15 -8
  33. vtlengine/Utils/__Virtual_Assets.py +34 -0
  34. vtlengine/Utils/__init__.py +49 -0
  35. vtlengine/__init__.py +4 -2
  36. vtlengine/files/parser/__init__.py +16 -26
  37. vtlengine/files/parser/_rfc_dialect.py +1 -1
  38. vtlengine/py.typed +0 -0
  39. vtlengine-1.2.0.dist-info/METADATA +92 -0
  40. vtlengine-1.2.0.dist-info/RECORD +63 -0
  41. {vtlengine-1.1rc2.dist-info → vtlengine-1.2.0.dist-info}/WHEEL +1 -1
  42. vtlengine-1.1rc2.dist-info/METADATA +0 -248
  43. vtlengine-1.1rc2.dist-info/RECORD +0 -59
  44. {vtlengine-1.1rc2.dist-info → vtlengine-1.2.0.dist-info}/LICENSE.md +0 -0
vtlengine/API/__init__.py CHANGED
@@ -1,29 +1,41 @@
1
+ import warnings
1
2
  from pathlib import Path
2
- from typing import Any, Dict, List, Optional, Union
3
+ from typing import Any, Dict, List, Optional, Sequence, Union
3
4
 
4
5
  import pandas as pd
5
6
  from antlr4 import CommonTokenStream, InputStream # type: ignore[import-untyped]
6
7
  from antlr4.error.ErrorListener import ErrorListener # type: ignore[import-untyped]
8
+ from pysdmx.io.pd import PandasDataset
9
+ from pysdmx.model import DataflowRef, Reference, TransformationScheme
10
+ from pysdmx.model.dataflow import Dataflow, Schema
11
+ from pysdmx.model.vtl import VtlDataflowMapping
12
+ from pysdmx.util import parse_urn
7
13
 
8
14
  from vtlengine.API._InternalApi import (
9
15
  _check_output_folder,
16
+ _check_script,
10
17
  _return_only_persistent_datasets,
18
+ ast_to_sdmx,
11
19
  load_datasets,
12
20
  load_datasets_with_data,
13
21
  load_external_routines,
14
22
  load_value_domains,
15
23
  load_vtl,
24
+ to_vtl_json,
16
25
  )
17
26
  from vtlengine.AST import Start
18
27
  from vtlengine.AST.ASTConstructor import ASTVisitor
28
+ from vtlengine.AST.ASTString import ASTString
19
29
  from vtlengine.AST.DAG import DAGAnalyzer
20
30
  from vtlengine.AST.Grammar.lexer import Lexer
21
31
  from vtlengine.AST.Grammar.parser import Parser
32
+ from vtlengine.Exceptions import SemanticError
22
33
  from vtlengine.files.output._time_period_representation import (
23
34
  TimePeriodRepresentation,
24
35
  format_time_period_external_representation,
25
36
  )
26
37
  from vtlengine.Interpreter import InterpreterAnalyzer
38
+ from vtlengine.Model import Dataset, Scalar
27
39
 
28
40
  pd.options.mode.chained_assignment = None
29
41
 
@@ -68,6 +80,38 @@ def _parser(stream: CommonTokenStream) -> Any:
68
80
  return vtl_parser.start()
69
81
 
70
82
 
83
+ def _extract_input_datasets(script: Union[str, TransformationScheme, Path]) -> str:
84
+ if isinstance(script, TransformationScheme):
85
+ vtl_script = _check_script(script)
86
+ elif isinstance(script, (str, Path)):
87
+ vtl_script = load_vtl(script)
88
+ else:
89
+ raise TypeError("Unsupported script type.")
90
+
91
+ ast = create_ast(vtl_script)
92
+ dag_inputs = DAGAnalyzer.ds_structure(ast)["global_inputs"]
93
+
94
+ return dag_inputs
95
+
96
+
97
+ def prettify(script: Union[str, TransformationScheme, Path]) -> str:
98
+ """
99
+ Function that prettifies the VTL script given.
100
+
101
+ Args:
102
+ script: VTL script as a string, a Transformation Scheme object or Path with the VTL script.
103
+
104
+ Returns:
105
+ A str with the prettified VTL script.
106
+ """
107
+ from vtlengine.AST.ASTComment import create_ast_with_comments
108
+
109
+ checking = _check_script(script)
110
+ vtl = load_vtl(checking)
111
+ ast = create_ast_with_comments(vtl)
112
+ return ASTString(pretty=True).render(ast)
113
+
114
+
71
115
  def create_ast(text: str) -> Start:
72
116
  """
73
117
  Function that creates the AST object.
@@ -90,30 +134,19 @@ def create_ast(text: str) -> Start:
90
134
 
91
135
 
92
136
  def semantic_analysis(
93
- script: Union[str, Path],
94
- data_structures: Union[Dict[str, Any], Path, List[Union[Dict[str, Any], Path]]],
137
+ script: Union[str, TransformationScheme, Path],
138
+ data_structures: Union[Dict[str, Any], Path, List[Dict[str, Any]], List[Path]],
95
139
  value_domains: Optional[Union[Dict[str, Any], Path]] = None,
96
140
  external_routines: Optional[Union[Dict[str, Any], Path]] = None,
97
- ) -> Any:
141
+ ) -> Dict[str, Dataset]:
98
142
  """
99
- Checks if the vtl operation can be done.To do that, it generates the AST with the vtl script
100
- given and also reviews if the data structure given can fit with it.
143
+ Checks if the vtl scripts and its related datastructures are valid. As part of the compatibility
144
+ with pysdmx library, the vtl script can be a Transformation Scheme object, which availability as
145
+ input is going to be serialized as a string VTL script.
101
146
 
102
- This vtl script can be a string with the actual expression or a filepath to the folder
103
- that contains the vtl file.
104
-
105
- Moreover, the data structure can be a dictionary or a filepath to the folder that contains it.
106
-
107
- If there are any value domains or external routines, this data is taken into account.
108
- Both can be loaded the same way as data structures or vtl scripts are.
109
-
110
- Finally, the :obj:`Interpreter <vtl-engine-spark.Interpreter.InterpreterAnalyzer>`
111
- class takes all of this information and checks it with the ast generated to
112
- return the semantic analysis result.
113
-
114
- Concepts you may know:
147
+ Concepts you may need to know:
115
148
 
116
- - Vtl script: The expression that shows the operation to be done.
149
+ - Vtl script: The script that shows the set of operations to be executed.
117
150
 
118
151
  - Data Structure: JSON file that contains the structure and the name for the dataset(s) \
119
152
  (and/or scalar) about the datatype (String, integer or number), \
@@ -126,7 +159,8 @@ def semantic_analysis(
126
159
  This function has the following params:
127
160
 
128
161
  Args:
129
- script: String or Path of the vtl expression.
162
+ script: Vtl script as a string, Transformation Scheme object or Path to the folder \
163
+ that holds the vtl script.
130
164
  data_structures: Dict or Path (file or folder), \
131
165
  or List of Dicts or Paths with the data structures JSON files.
132
166
  value_domains: Dict or Path of the value domains JSON files. (default: None)
@@ -139,12 +173,14 @@ def semantic_analysis(
139
173
  Exception: If the files have the wrong format, or they do not exist, \
140
174
  or their Paths are invalid.
141
175
  """
176
+
142
177
  # AST generation
143
- vtl = load_vtl(script)
178
+ checking = _check_script(script)
179
+ vtl = load_vtl(checking)
144
180
  ast = create_ast(vtl)
145
181
 
146
182
  # Loading datasets
147
- structures = load_datasets(data_structures)
183
+ datasets, scalars = load_datasets(data_structures)
148
184
 
149
185
  # Handling of library items
150
186
  vd = None
@@ -156,9 +192,10 @@ def semantic_analysis(
156
192
 
157
193
  # Running the interpreter
158
194
  interpreter = InterpreterAnalyzer(
159
- datasets=structures,
195
+ datasets=datasets,
160
196
  value_domains=vd,
161
197
  external_routines=ext_routines,
198
+ scalars=scalars,
162
199
  only_semantic=True,
163
200
  )
164
201
  result = interpreter.visit(ast)
@@ -166,30 +203,35 @@ def semantic_analysis(
166
203
 
167
204
 
168
205
  def run(
169
- script: Union[str, Path],
170
- data_structures: Union[Dict[str, Any], Path, List[Union[Dict[str, Any], Path]]],
171
- datapoints: Union[Dict[str, Any], str, Path, List[Union[str, Path]]],
206
+ script: Union[str, TransformationScheme, Path],
207
+ data_structures: Union[Dict[str, Any], Path, List[Dict[str, Any]], List[Path]],
208
+ datapoints: Union[Dict[str, pd.DataFrame], str, Path, List[Dict[str, Any]], List[Path]],
172
209
  value_domains: Optional[Union[Dict[str, Any], Path]] = None,
173
210
  external_routines: Optional[Union[str, Path]] = None,
174
211
  time_period_output_format: str = "vtl",
175
- return_only_persistent: bool = False,
212
+ return_only_persistent: bool = True,
176
213
  output_folder: Optional[Union[str, Path]] = None,
177
- ) -> Any:
214
+ scalar_values: Optional[Dict[str, Optional[Union[int, str, bool, float]]]] = None,
215
+ ) -> Dict[str, Union[Dataset, Scalar]]:
178
216
  """
179
- Run is the main function of the ``API``, which mission is to ensure the vtl operation is ready
180
- to be performed.
181
- When the vtl expression is given, an AST object is created.
182
- This vtl script can be given as a string or a path with the folder or file that contains it.
183
- At the same time, data structures are loaded with its datapoints.
217
+ Run is the main function of the ``API``, which mission is to execute
218
+ the vtl operation over the data.
219
+
220
+ Concepts you may need to know:
221
+
222
+ - Vtl script: The script that shows the set of operations to be executed.
223
+
224
+ - Data Structure: JSON file that contains the structure and the name for the dataset(s) \
225
+ (and/or scalar) about the datatype (String, integer or number), \
226
+ the role (Identifier, Attribute or Measure) and the nullability each component has.
184
227
 
185
- The data structure information is contained in the JSON file given,
186
- and establish the datatype (string, integer or number),
187
- and the role that each component is going to have (Identifier, Attribute or Measure).
188
- It can be a dictionary or a path to the JSON file or folder that contains it.
228
+ - Data point: `Pandas Dataframe \
229
+ <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_ \
230
+ that holds the data related to the Dataset.
189
231
 
190
- Moreover, a csv file with the data to operate with is going to be loaded.
191
- It can be given with a dictionary (dataset name : pandas Dataframe),
192
- a path or S3 URI to the folder, path or S3 to the csv file that contains the data.
232
+ - Value domains: Collection of unique values on the same datatype.
233
+
234
+ - External routines: SQL query used to transform a dataset.
193
235
 
194
236
  .. important::
195
237
  The data structure and the data points must have the same dataset
@@ -212,35 +254,12 @@ def run(
212
254
  For more details, see
213
255
  `s3fs documentation <https://s3fs.readthedocs.io/en/latest/index.html#credentials>`_.
214
256
 
215
- Before the execution, the DAG analysis reviews if the VTL script is a direct acyclic graphs.
216
-
217
-
218
- If value domain data or external routines are required, the function loads this information
219
- and integrates them into the
220
- :obj:`Interpreter <vtl-engine-spark.Interpreter.InterpreterAnalyzer>` class.
221
-
222
- Moreover, if any component has a Time Period component, the external representation
223
- is passed to the Interpreter class.
224
-
225
- Concepts you may need to know:
226
-
227
- - Vtl script: The expression that shows the operation to be done.
228
-
229
- - Data Structure: JSON file that contains the structure and the name for the dataset(s) \
230
- (and/or scalar) about the datatype (String, integer or number), \
231
- the role (Identifier, Attribute or Measure) and the nullability each component has.
232
-
233
- - Data point: Pointer to the data. It will be loaded as a `Pandas Dataframe \
234
- <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_.
235
-
236
- - Value domains: Collection of unique values that have the same datatype.
237
-
238
- - External routines: SQL query used to transform a dataset.
257
+ Before the execution, the DAG analysis reviews if the VTL script is a direct acyclic graph.
239
258
 
240
259
  This function has the following params:
241
260
 
242
261
  Args:
243
- script: String or Path with the vtl expression.
262
+ script: VTL script as a string, a Transformation Scheme object or Path with the VTL script.
244
263
 
245
264
  data_structures: Dict, Path or a List of Dicts or Paths with the data structures.
246
265
 
@@ -255,10 +274,12 @@ def run(
255
274
  Time Period components.
256
275
 
257
276
  return_only_persistent: If True, run function will only return the results of \
258
- Persistent Assignments. (default: False)
277
+ Persistent Assignments. (default: True)
259
278
 
260
279
  output_folder: Path or S3 URI to the output folder. (default: None)
261
280
 
281
+ scalar_values: Dict with the scalar values to be used in the VTL script. \
282
+
262
283
 
263
284
  Returns:
264
285
  The datasets are produced without data if the output folder is defined.
@@ -268,12 +289,16 @@ def run(
268
289
  or their Paths are invalid.
269
290
 
270
291
  """
292
+
271
293
  # AST generation
294
+ script = _check_script(script)
272
295
  vtl = load_vtl(script)
273
296
  ast = create_ast(vtl)
274
297
 
275
298
  # Loading datasets and datapoints
276
- datasets, path_dict = load_datasets_with_data(data_structures, datapoints)
299
+ datasets, scalars, path_dict = load_datasets_with_data(
300
+ data_structures, datapoints, scalar_values
301
+ )
277
302
 
278
303
  # Handling of library items
279
304
  vd = None
@@ -302,15 +327,197 @@ def run(
302
327
  datapoints_paths=path_dict,
303
328
  output_path=output_folder,
304
329
  time_period_representation=time_period_representation,
330
+ return_only_persistent=return_only_persistent,
331
+ scalars=scalars,
305
332
  )
306
333
  result = interpreter.visit(ast)
307
334
 
308
335
  # Applying time period output format
309
336
  if output_folder is None:
310
- for dataset in result.values():
311
- format_time_period_external_representation(dataset, time_period_representation)
337
+ for obj in result.values():
338
+ if isinstance(obj, (Dataset, Scalar)):
339
+ format_time_period_external_representation(obj, time_period_representation)
312
340
 
313
341
  # Returning only persistent datasets
314
342
  if return_only_persistent:
315
343
  return _return_only_persistent_datasets(result, ast)
316
344
  return result
345
+
346
+
347
+ def run_sdmx( # noqa: C901
348
+ script: Union[str, TransformationScheme, Path],
349
+ datasets: Sequence[PandasDataset],
350
+ mappings: Optional[Union[VtlDataflowMapping, Dict[str, str]]] = None,
351
+ value_domains: Optional[Union[Dict[str, Any], Path]] = None,
352
+ external_routines: Optional[Union[str, Path]] = None,
353
+ time_period_output_format: str = "vtl",
354
+ return_only_persistent: bool = True,
355
+ output_folder: Optional[Union[str, Path]] = None,
356
+ ) -> Dict[str, Union[Dataset, Scalar]]:
357
+ """
358
+ Executes a VTL script using a list of pysdmx `PandasDataset` objects.
359
+
360
+ This function prepares the required VTL data structures and datapoints from
361
+ the given list of pysdmx `PandasDataset` objects. It validates each
362
+ `PandasDataset` uses a valid `Schema` instance as its structure. Each `Schema` is converted
363
+ to the appropriate VTL JSON data structure, and the Pandas Dataframe is extracted.
364
+
365
+ .. important::
366
+ We recommend to use this function in combination with the
367
+ `get_datasets <https://py.sdmx.io/howto/data_rw.html#pysdmx.io.get_datasets>`_
368
+ pysdmx method.
369
+
370
+ .. important::
371
+ The mapping between pysdmx `PandasDataset
372
+ <https://py.sdmx.io/howto/data_rw.html#pysdmx.io.pd.PandasDataset>`_ \
373
+ and VTL datasets is done using the `Schema` instance of the `PandasDataset`.
374
+ The Schema ID is used as the dataset name.
375
+
376
+ DataStructure=MD:TEST_DS(1.0) -> TEST_DS
377
+
378
+ The function then calls the :obj:`run <vtlengine.API>` function with the provided VTL
379
+ script and prepared inputs.
380
+
381
+ Before the execution, the DAG analysis reviews if the generated VTL script is a direct acyclic
382
+ graph.
383
+
384
+ Args:
385
+ script: VTL script as a string, a Transformation Scheme object or Path with the VTL script.
386
+
387
+ datasets: A list of PandasDataset.
388
+
389
+ mappings: A dictionary or VtlDataflowMapping object that maps the dataset names.
390
+
391
+ value_domains: Dict or Path of the value domains JSON files. (default:None)
392
+
393
+ external_routines: String or Path of the external routines SQL files. (default: None)
394
+
395
+ time_period_output_format: String with the possible values \
396
+ ("sdmx_gregorian", "sdmx_reporting", "vtl") for the representation of the \
397
+ Time Period components.
398
+
399
+ return_only_persistent: If True, run function will only return the results of \
400
+ Persistent Assignments. (default: True)
401
+
402
+ output_folder: Path or S3 URI to the output folder. (default: None)
403
+
404
+ Returns:
405
+ The datasets are produced without data if the output folder is defined.
406
+
407
+ Raises:
408
+ SemanticError: If any dataset does not contain a valid `Schema` instance as its structure.
409
+
410
+ """
411
+ mapping_dict = {}
412
+ input_names = _extract_input_datasets(script)
413
+
414
+ if not isinstance(datasets, (list, set)) or any(
415
+ not isinstance(ds, PandasDataset) for ds in datasets
416
+ ):
417
+ type_ = type(datasets).__name__
418
+ if isinstance(datasets, (list, set)):
419
+ object_typing = {type(o).__name__ for o in datasets}
420
+ type_ = f"{type_}[{', '.join(object_typing)}]"
421
+ raise SemanticError("0-1-3-7", type_=type_)
422
+
423
+ # Mapping handling
424
+ if mappings is None:
425
+ if len(datasets) != 1:
426
+ raise SemanticError("0-1-3-3")
427
+ if len(datasets) == 1:
428
+ if len(input_names) != 1:
429
+ raise SemanticError("0-1-3-1", number_datasets=len(input_names))
430
+ schema = datasets[0].structure
431
+ if not isinstance(schema, Schema):
432
+ raise SemanticError("0-1-3-2", schema=schema)
433
+ mapping_dict = {schema.short_urn: input_names[0]}
434
+ elif isinstance(mappings, Dict):
435
+ mapping_dict = mappings
436
+ elif isinstance(mappings, VtlDataflowMapping):
437
+ if mappings.to_vtl_mapping_method is not None:
438
+ warnings.warn(
439
+ "To_vtl_mapping_method is not implemented yet, we will use the Basic "
440
+ "method with old data."
441
+ )
442
+ if mappings.from_vtl_mapping_method is not None:
443
+ warnings.warn(
444
+ "From_vtl_mapping_method is not implemented yet, we will use the Basic "
445
+ "method with old data."
446
+ )
447
+ if isinstance(mappings.dataflow, str):
448
+ short_urn = str(parse_urn(mappings.dataflow))
449
+ elif isinstance(mappings.dataflow, (Reference, DataflowRef)):
450
+ short_urn = str(mappings.dataflow)
451
+ elif isinstance(mappings.dataflow, Dataflow):
452
+ short_urn = mappings.dataflow.short_urn
453
+ else:
454
+ raise TypeError(
455
+ "Expected str, Reference, DataflowRef or Dataflow type for dataflow in "
456
+ "VtlDataflowMapping."
457
+ )
458
+
459
+ mapping_dict = {short_urn: mappings.dataflow_alias}
460
+ else:
461
+ raise TypeError("Expected dict or VtlDataflowMapping type for mappings.")
462
+
463
+ for vtl_name in mapping_dict.values():
464
+ if vtl_name not in input_names:
465
+ raise SemanticError("0-1-3-5", dataset_name=vtl_name)
466
+
467
+ datapoints = {}
468
+ data_structures = []
469
+ for dataset in datasets:
470
+ schema = dataset.structure
471
+ if not isinstance(schema, Schema):
472
+ raise SemanticError("0-1-3-2", schema=schema)
473
+ if schema.short_urn not in mapping_dict:
474
+ raise SemanticError("0-1-3-4", short_urn=schema.short_urn)
475
+ # Generating VTL Datastructure and Datapoints.
476
+ dataset_name = mapping_dict[schema.short_urn]
477
+ vtl_structure = to_vtl_json(schema, dataset_name)
478
+ data_structures.append(vtl_structure)
479
+ datapoints[dataset_name] = dataset.data
480
+
481
+ missing = []
482
+ for input_name in input_names:
483
+ if input_name not in mapping_dict.values():
484
+ missing.append(input_name)
485
+ if missing:
486
+ raise SemanticError("0-1-3-6", missing=missing)
487
+
488
+ result = run(
489
+ script=script,
490
+ data_structures=data_structures,
491
+ datapoints=datapoints,
492
+ value_domains=value_domains,
493
+ external_routines=external_routines,
494
+ time_period_output_format=time_period_output_format,
495
+ return_only_persistent=return_only_persistent,
496
+ output_folder=output_folder,
497
+ )
498
+ return result
499
+
500
+
501
+ def generate_sdmx(
502
+ script: Union[str, Path], agency_id: str, id: str, version: str = "1.0"
503
+ ) -> TransformationScheme:
504
+ """
505
+ Function that generates a TransformationScheme object from a VTL script.
506
+
507
+ The TransformationScheme object is the SDMX representation of the VTL script. \
508
+ For more details please check the `SDMX IM VTL objects \
509
+ <https://sdmx.org/wp-content/uploads/SDMX_3-0-0_SECTION_2_FINAL-1_0.pdf#page=146>`_, line 2266.
510
+
511
+ Args:
512
+ script: A string with the VTL script.
513
+ agency_id: The Agency ID used in the generated `TransformationScheme` object.
514
+ id: The given id of the generated `TransformationScheme` object.
515
+ version: The Version used in the generated `TransformationScheme` object. (default: "1.0")
516
+
517
+ Returns:
518
+ The generated Transformation Scheme object.
519
+ """
520
+ vtl = load_vtl(script)
521
+ ast = create_ast(vtl)
522
+ result = ast_to_sdmx(ast, agency_id, id, version)
523
+ return result
@@ -0,0 +1,56 @@
1
+ from antlr4 import CommonTokenStream, InputStream
2
+ from antlr4.Token import CommonToken
3
+
4
+ from vtlengine.API import create_ast
5
+ from vtlengine.AST import Comment, Start
6
+ from vtlengine.AST.ASTConstructorModules import extract_token_info
7
+ from vtlengine.AST.Grammar.lexer import Lexer
8
+
9
+
10
+ def generate_ast_comment(token: CommonToken) -> Comment:
11
+ """
12
+ Parses a token belonging to a comment and returns a Comment AST object.
13
+
14
+ Args:
15
+ token (str): The comment string to parse.
16
+
17
+ Returns:
18
+ Comment: A Comment AST object.
19
+ """
20
+ token_info = extract_token_info(token)
21
+ text = token.text
22
+ if token.type == Lexer.SL_COMMENT:
23
+ text = token.text[:-1] # Remove the trailing newline character
24
+ return Comment(value=text, **token_info)
25
+
26
+
27
+ def create_ast_with_comments(text: str) -> Start:
28
+ """
29
+ Parses a VTL script and returns an AST with comments.
30
+
31
+ Args:
32
+ text (str): The VTL script to parse.
33
+
34
+ Returns:
35
+ AST: The generated AST with comments.
36
+ """
37
+ # Call the create_ast function to generate the AST from channel 0
38
+ ast = create_ast(text)
39
+
40
+ # Reading the script on channel 2 to get the comments
41
+ lexer_ = Lexer(InputStream(text))
42
+ stream = CommonTokenStream(lexer_, channel=2)
43
+
44
+ # Fill the stream with tokens on the buffer
45
+ stream.fill()
46
+
47
+ # Extract comments from the stream
48
+ comments = [generate_ast_comment(token) for token in stream.tokens if token.channel == 2]
49
+
50
+ # Add comments to the AST
51
+ ast.children.extend(comments)
52
+
53
+ # Sort the ast children based on their start line and column
54
+ ast.children.sort(key=lambda x: (x.line_start, x.column_start))
55
+
56
+ return ast