vtlengine 1.0__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vtlengine might be problematic. Click here for more details.

Files changed (56) hide show
  1. vtlengine/API/_InternalApi.py +159 -102
  2. vtlengine/API/__init__.py +110 -68
  3. vtlengine/AST/ASTConstructor.py +188 -98
  4. vtlengine/AST/ASTConstructorModules/Expr.py +402 -205
  5. vtlengine/AST/ASTConstructorModules/ExprComponents.py +248 -104
  6. vtlengine/AST/ASTConstructorModules/Terminals.py +158 -95
  7. vtlengine/AST/ASTEncoders.py +1 -1
  8. vtlengine/AST/ASTTemplate.py +24 -9
  9. vtlengine/AST/ASTVisitor.py +8 -12
  10. vtlengine/AST/DAG/__init__.py +43 -35
  11. vtlengine/AST/DAG/_words.py +4 -4
  12. vtlengine/AST/Grammar/Vtl.g4 +49 -20
  13. vtlengine/AST/Grammar/VtlTokens.g4 +13 -1
  14. vtlengine/AST/Grammar/lexer.py +2012 -1312
  15. vtlengine/AST/Grammar/parser.py +7524 -4343
  16. vtlengine/AST/Grammar/tokens.py +140 -128
  17. vtlengine/AST/VtlVisitor.py +16 -5
  18. vtlengine/AST/__init__.py +41 -11
  19. vtlengine/DataTypes/NumericTypesHandling.py +5 -4
  20. vtlengine/DataTypes/TimeHandling.py +196 -301
  21. vtlengine/DataTypes/__init__.py +304 -218
  22. vtlengine/Exceptions/__init__.py +96 -27
  23. vtlengine/Exceptions/messages.py +149 -69
  24. vtlengine/Interpreter/__init__.py +817 -497
  25. vtlengine/Model/__init__.py +172 -121
  26. vtlengine/Operators/Aggregation.py +156 -95
  27. vtlengine/Operators/Analytic.py +167 -79
  28. vtlengine/Operators/Assignment.py +7 -4
  29. vtlengine/Operators/Boolean.py +27 -32
  30. vtlengine/Operators/CastOperator.py +177 -131
  31. vtlengine/Operators/Clause.py +137 -99
  32. vtlengine/Operators/Comparison.py +148 -117
  33. vtlengine/Operators/Conditional.py +290 -98
  34. vtlengine/Operators/General.py +68 -47
  35. vtlengine/Operators/HROperators.py +91 -72
  36. vtlengine/Operators/Join.py +217 -118
  37. vtlengine/Operators/Numeric.py +129 -46
  38. vtlengine/Operators/RoleSetter.py +16 -15
  39. vtlengine/Operators/Set.py +61 -36
  40. vtlengine/Operators/String.py +213 -139
  41. vtlengine/Operators/Time.py +467 -215
  42. vtlengine/Operators/Validation.py +117 -76
  43. vtlengine/Operators/__init__.py +340 -213
  44. vtlengine/Utils/__init__.py +232 -41
  45. vtlengine/__init__.py +1 -1
  46. vtlengine/files/output/__init__.py +15 -6
  47. vtlengine/files/output/_time_period_representation.py +10 -9
  48. vtlengine/files/parser/__init__.py +79 -52
  49. vtlengine/files/parser/_rfc_dialect.py +6 -5
  50. vtlengine/files/parser/_time_checking.py +48 -37
  51. vtlengine-1.0.2.dist-info/METADATA +245 -0
  52. vtlengine-1.0.2.dist-info/RECORD +58 -0
  53. {vtlengine-1.0.dist-info → vtlengine-1.0.2.dist-info}/WHEEL +1 -1
  54. vtlengine-1.0.dist-info/METADATA +0 -104
  55. vtlengine-1.0.dist-info/RECORD +0 -58
  56. {vtlengine-1.0.dist-info → vtlengine-1.0.2.dist-info}/LICENSE.md +0 -0
vtlengine/API/__init__.py CHANGED
@@ -1,32 +1,45 @@
1
1
  from pathlib import Path
2
- from typing import Any, Union, List, Optional
3
-
4
- from antlr4 import CommonTokenStream, InputStream
5
- from antlr4.error.ErrorListener import ErrorListener
6
-
7
- from vtlengine.API._InternalApi import load_vtl, load_datasets, load_value_domains, \
8
- load_external_routines, \
9
- load_datasets_with_data, _return_only_persistent_datasets, _check_output_folder
2
+ from typing import Any, Union, List, Optional, Dict
3
+
4
+ import pandas as pd
5
+ from antlr4 import CommonTokenStream, InputStream # type: ignore[import-untyped]
6
+ from antlr4.error.ErrorListener import ErrorListener # type: ignore[import-untyped]
7
+
8
+ from vtlengine.API._InternalApi import (
9
+ load_vtl,
10
+ load_datasets,
11
+ load_value_domains,
12
+ load_external_routines,
13
+ load_datasets_with_data,
14
+ _return_only_persistent_datasets,
15
+ _check_output_folder,
16
+ )
10
17
  from vtlengine.AST import Start
11
18
  from vtlengine.AST.ASTConstructor import ASTVisitor
12
19
  from vtlengine.AST.DAG import DAGAnalyzer
13
20
  from vtlengine.AST.Grammar.lexer import Lexer
14
21
  from vtlengine.AST.Grammar.parser import Parser
15
22
  from vtlengine.Interpreter import InterpreterAnalyzer
16
- from vtlengine.files.output import TimePeriodRepresentation, \
17
- format_time_period_external_representation
23
+ from vtlengine.files.output._time_period_representation import (
24
+ format_time_period_external_representation,
25
+ TimePeriodRepresentation,
26
+ )
18
27
 
28
+ pd.options.mode.chained_assignment = None
19
29
 
20
- class __VTLSingleErrorListener(ErrorListener):
21
- """
22
30
 
23
- """
31
+ class __VTLSingleErrorListener(ErrorListener): # type: ignore[misc]
32
+ """ """
24
33
 
25
- def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e):
26
- raise Exception(f"Not valid VTL Syntax \n "
27
- f"offendingSymbol: {offendingSymbol} \n "
28
- f"msg: {msg} \n "
29
- f"line: {line}")
34
+ def syntaxError(
35
+ self, recognizer: Any, offendingSymbol: str, line: str, column: str, msg: str, e: Any
36
+ ) -> None:
37
+ raise Exception(
38
+ f"Not valid VTL Syntax \n "
39
+ f"offendingSymbol: {offendingSymbol} \n "
40
+ f"msg: {msg} \n "
41
+ f"line: {line}"
42
+ )
30
43
 
31
44
 
32
45
  def _lexer(text: str) -> CommonTokenStream:
@@ -52,19 +65,30 @@ def _parser(stream: CommonTokenStream) -> Any:
52
65
  def create_ast(text: str) -> Start:
53
66
  """
54
67
  Function that creates the AST object.
68
+
69
+ Args:
70
+ text: Vtl string expression that will be used to create the AST object.
71
+
72
+ Returns:
73
+ The ast object.
74
+
75
+ Raises:
76
+ Exception: When the vtl syntax expression is wrong.
55
77
  """
56
78
  stream = _lexer(text)
57
79
  cst = _parser(stream)
58
80
  visitor = ASTVisitor()
59
- ast = visitor.visit(cst)
81
+ ast = visitor.visitStart(cst)
60
82
  DAGAnalyzer.createDAG(ast)
61
83
  return ast
62
84
 
63
85
 
64
- def semantic_analysis(script: Union[str, Path],
65
- data_structures: Union[dict, Path, List[Union[dict, Path]]],
66
- value_domains: Union[dict, Path] = None,
67
- external_routines: Union[str, Path] = None):
86
+ def semantic_analysis(
87
+ script: Union[str, Path],
88
+ data_structures: Union[Dict[str, Any], Path, List[Union[Dict[str, Any], Path]]],
89
+ value_domains: Optional[Union[Dict[str, Any], Path]] = None,
90
+ external_routines: Optional[Union[Dict[str, Any], Path]] = None,
91
+ ) -> Any:
68
92
  """
69
93
  Checks if the vtl operation can be done.To do that, it generates the AST with the vtl script
70
94
  given and also reviews if the data structure given can fit with it.
@@ -85,9 +109,9 @@ def semantic_analysis(script: Union[str, Path],
85
109
 
86
110
  - Vtl script: The expression that shows the operation to be done.
87
111
 
88
- - Data Structure: Json file that contains the structure and the name for the dataset(s) \
89
- (and/or scalar) about the datatype (String, integer or number) and \
90
- the role (Measure or Identifier) each data has.
112
+ - Data Structure: JSON file that contains the structure and the name for the dataset(s) \
113
+ (and/or scalar) about the datatype (String, integer or number), \
114
+ the role (Identifier, Attribute or Measure) and the nullability each component has.
91
115
 
92
116
  - Value domains: Collection of unique values on the same datatype.
93
117
 
@@ -95,16 +119,19 @@ def semantic_analysis(script: Union[str, Path],
95
119
 
96
120
  This function has the following params:
97
121
 
98
- :param script: String or Path of the vtl expression.
99
-
100
- :param data_structures: Dict or Path (file or folder), \
101
- or List of Dicts or Paths with the data structures JSON files.
102
-
103
- :param value_domains: Dict or Path of the value domains JSON files. (default: None)
122
+ Args:
123
+ script: String or Path of the vtl expression.
124
+ data_structures: Dict or Path (file or folder), \
125
+ or List of Dicts or Paths with the data structures JSON files.
126
+ value_domains: Dict or Path of the value domains JSON files. (default: None)
127
+ external_routines: String or Path of the external routines SQL files. (default: None)
104
128
 
105
- :param external_routines: String or Path of the external routines SQL files. (default: None)
129
+ Returns:
130
+ The computed datasets.
106
131
 
107
- :return: The computed datasets.
132
+ Raises:
133
+ Exception: If the files have the wrong format, or they do not exist, \
134
+ or their Paths are invalid.
108
135
  """
109
136
  # AST generation
110
137
  vtl = load_vtl(script)
@@ -122,19 +149,24 @@ def semantic_analysis(script: Union[str, Path],
122
149
  ext_routines = load_external_routines(external_routines)
123
150
 
124
151
  # Running the interpreter
125
- interpreter = InterpreterAnalyzer(datasets=structures, value_domains=vd,
126
- external_routines=ext_routines,
127
- only_semantic=True)
128
- result = interpreter.visit(ast)
152
+ interpreter = InterpreterAnalyzer(
153
+ datasets=structures, value_domains=vd, external_routines=ext_routines, only_semantic=True
154
+ )
155
+ with pd.option_context("future.no_silent_downcasting", True):
156
+ result = interpreter.visit(ast)
129
157
  return result
130
158
 
131
159
 
132
- def run(script: Union[str, Path], data_structures: Union[dict, Path, List[Union[dict, Path]]],
133
- datapoints: Union[dict, str, Path, List[Union[str, Path]]],
134
- value_domains: Union[dict, Path] = None, external_routines: Union[str, Path] = None,
135
- time_period_output_format: str = "vtl",
136
- return_only_persistent=False,
137
- output_folder: Optional[Union[str, Path]] = None):
160
+ def run(
161
+ script: Union[str, Path],
162
+ data_structures: Union[Dict[str, Any], Path, List[Union[Dict[str, Any], Path]]],
163
+ datapoints: Union[Dict[str, Any], str, Path, List[Union[str, Path]]],
164
+ value_domains: Optional[Union[Dict[str, Any], Path]] = None,
165
+ external_routines: Optional[Union[str, Path]] = None,
166
+ time_period_output_format: str = "vtl",
167
+ return_only_persistent: bool = False,
168
+ output_folder: Optional[Union[str, Path]] = None,
169
+ ) -> Any:
138
170
  """
139
171
  Run is the main function of the ``API``, which mission is to ensure the vtl operation is ready
140
172
  to be performed.
@@ -144,7 +176,7 @@ def run(script: Union[str, Path], data_structures: Union[dict, Path, List[Union[
144
176
 
145
177
  The data structure information is contained in the JSON file given,
146
178
  and establish the datatype (string, integer or number),
147
- and the role that each component is going to have (Identifier or Measure).
179
+ and the role that each component is going to have (Identifier, Attribute or Measure).
148
180
  It can be a dictionary or a path to the JSON file or folder that contains it.
149
181
 
150
182
  Moreover, a csv file with the data to operate with is going to be loaded.
@@ -186,10 +218,9 @@ def run(script: Union[str, Path], data_structures: Union[dict, Path, List[Union[
186
218
 
187
219
  - Vtl script: The expression that shows the operation to be done.
188
220
 
189
- - Data Structure: \
190
- JSON file that contains the structure and the name for the dataset(s) (and/or scalar) \
191
- about the datatype (String, integer or number) and the role (Identifier, Attribute or Measure)
192
- each component has.
221
+ - Data Structure: JSON file that contains the structure and the name for the dataset(s) \
222
+ (and/or scalar) about the datatype (String, integer or number), \
223
+ the role (Identifier, Attribute or Measure) and the nullability each component has.
193
224
 
194
225
  - Data point: Pointer to the data. It will be loaded as a `Pandas Dataframe \
195
226
  <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_.
@@ -200,26 +231,33 @@ def run(script: Union[str, Path], data_structures: Union[dict, Path, List[Union[
200
231
 
201
232
  This function has the following params:
202
233
 
203
- :param script: String or Path with the vtl expression.
234
+ Args:
235
+ script: String or Path with the vtl expression.
236
+
237
+ data_structures: Dict, Path or a List of Dicts or Paths with the data structures.
238
+
239
+ datapoints: Dict, Path, S3 URI or List of S3 URIs or Paths with data.
204
240
 
205
- :param data_structures: Dict, Path or a List of Dicts or Paths with the data structures.
241
+ value_domains: Dict or Path of the value domains JSON files. (default:None)
206
242
 
207
- :param datapoints: Dict, Path, S3 URI or List of S3 URIs or Paths with data.
243
+ external_routines: String or Path of the external routines SQL files. (default: None)
208
244
 
209
- :param value_domains: Dict or Path of the value domains JSON files. (default:None)
245
+ time_period_output_format: String with the possible values \
246
+ ("sdmx_gregorian", "sdmx_reporting", "vtl") for the representation of the \
247
+ Time Period components.
210
248
 
211
- :param external_routines: String or Path of the external routines SQL files. (default: None)
249
+ return_only_persistent: If True, run function will only return the results of \
250
+ Persistent Assignments. (default: False)
212
251
 
213
- :param time_period_output_format: String with the possible values \
214
- ("sdmx_gregorian", "sdmx_reporting", "vtl") for the representation of the \
215
- Time Period components.
252
+ output_folder: Path or S3 URI to the output folder. (default: None)
216
253
 
217
- :param return_only_persistent: If True, run function will only return the results of \
218
- Persistent Assignments. (default: False)
219
254
 
220
- :param output_folder: Path or S3 URI to the output folder. (default: None)
255
+ Returns:
256
+ The datasets are produced without data if the output folder is defined.
221
257
 
222
- :return: The datasets are produced without data if the output folder is defined.
258
+ Raises:
259
+ Exception: If the files have the wrong format, or they do not exist, \
260
+ or their Paths are invalid.
223
261
 
224
262
  """
225
263
  # AST generation
@@ -248,13 +286,17 @@ def run(script: Union[str, Path], data_structures: Union[dict, Path, List[Union[
248
286
  _check_output_folder(output_folder)
249
287
 
250
288
  # Running the interpreter
251
- interpreter = InterpreterAnalyzer(datasets=datasets, value_domains=vd,
252
- external_routines=ext_routines,
253
- ds_analysis=ds_analysis,
254
- datapoints_paths=path_dict,
255
- output_path=output_folder,
256
- time_period_representation=time_period_representation)
257
- result = interpreter.visit(ast)
289
+ interpreter = InterpreterAnalyzer(
290
+ datasets=datasets,
291
+ value_domains=vd,
292
+ external_routines=ext_routines,
293
+ ds_analysis=ds_analysis,
294
+ datapoints_paths=path_dict,
295
+ output_path=output_folder,
296
+ time_period_representation=time_period_representation,
297
+ )
298
+ with pd.option_context("future.no_silent_downcasting", True):
299
+ result = interpreter.visit(ast)
258
300
 
259
301
  # Applying time period output format
260
302
  if output_folder is None: