vtlengine 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +153 -100
- vtlengine/API/__init__.py +109 -67
- vtlengine/AST/ASTConstructor.py +188 -98
- vtlengine/AST/ASTConstructorModules/Expr.py +306 -200
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +172 -102
- vtlengine/AST/ASTConstructorModules/Terminals.py +158 -95
- vtlengine/AST/ASTEncoders.py +1 -1
- vtlengine/AST/ASTTemplate.py +8 -9
- vtlengine/AST/ASTVisitor.py +8 -12
- vtlengine/AST/DAG/__init__.py +43 -35
- vtlengine/AST/DAG/_words.py +4 -4
- vtlengine/AST/Grammar/lexer.py +732 -142
- vtlengine/AST/Grammar/parser.py +2188 -826
- vtlengine/AST/Grammar/tokens.py +128 -128
- vtlengine/AST/VtlVisitor.py +7 -4
- vtlengine/AST/__init__.py +22 -11
- vtlengine/DataTypes/NumericTypesHandling.py +5 -4
- vtlengine/DataTypes/TimeHandling.py +194 -301
- vtlengine/DataTypes/__init__.py +304 -218
- vtlengine/Exceptions/__init__.py +52 -27
- vtlengine/Exceptions/messages.py +134 -62
- vtlengine/Interpreter/__init__.py +781 -487
- vtlengine/Model/__init__.py +165 -121
- vtlengine/Operators/Aggregation.py +156 -95
- vtlengine/Operators/Analytic.py +115 -59
- vtlengine/Operators/Assignment.py +7 -4
- vtlengine/Operators/Boolean.py +27 -32
- vtlengine/Operators/CastOperator.py +177 -131
- vtlengine/Operators/Clause.py +137 -99
- vtlengine/Operators/Comparison.py +148 -117
- vtlengine/Operators/Conditional.py +149 -98
- vtlengine/Operators/General.py +68 -47
- vtlengine/Operators/HROperators.py +91 -72
- vtlengine/Operators/Join.py +217 -118
- vtlengine/Operators/Numeric.py +89 -44
- vtlengine/Operators/RoleSetter.py +16 -15
- vtlengine/Operators/Set.py +61 -36
- vtlengine/Operators/String.py +213 -139
- vtlengine/Operators/Time.py +334 -216
- vtlengine/Operators/Validation.py +117 -76
- vtlengine/Operators/__init__.py +340 -213
- vtlengine/Utils/__init__.py +195 -40
- vtlengine/__init__.py +1 -1
- vtlengine/files/output/__init__.py +15 -6
- vtlengine/files/output/_time_period_representation.py +10 -9
- vtlengine/files/parser/__init__.py +77 -52
- vtlengine/files/parser/_rfc_dialect.py +6 -5
- vtlengine/files/parser/_time_checking.py +46 -37
- vtlengine-1.0.1.dist-info/METADATA +236 -0
- vtlengine-1.0.1.dist-info/RECORD +58 -0
- {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/WHEEL +1 -1
- vtlengine-1.0.dist-info/METADATA +0 -104
- vtlengine-1.0.dist-info/RECORD +0 -58
- {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/LICENSE.md +0 -0
vtlengine/API/__init__.py
CHANGED
|
@@ -1,32 +1,45 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
|
-
from typing import Any, Union, List, Optional
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
from antlr4
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
2
|
+
from typing import Any, Union, List, Optional, Dict
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from antlr4 import CommonTokenStream, InputStream # type: ignore[import-untyped]
|
|
6
|
+
from antlr4.error.ErrorListener import ErrorListener # type: ignore[import-untyped]
|
|
7
|
+
|
|
8
|
+
from vtlengine.API._InternalApi import (
|
|
9
|
+
load_vtl,
|
|
10
|
+
load_datasets,
|
|
11
|
+
load_value_domains,
|
|
12
|
+
load_external_routines,
|
|
13
|
+
load_datasets_with_data,
|
|
14
|
+
_return_only_persistent_datasets,
|
|
15
|
+
_check_output_folder,
|
|
16
|
+
)
|
|
10
17
|
from vtlengine.AST import Start
|
|
11
18
|
from vtlengine.AST.ASTConstructor import ASTVisitor
|
|
12
19
|
from vtlengine.AST.DAG import DAGAnalyzer
|
|
13
20
|
from vtlengine.AST.Grammar.lexer import Lexer
|
|
14
21
|
from vtlengine.AST.Grammar.parser import Parser
|
|
15
22
|
from vtlengine.Interpreter import InterpreterAnalyzer
|
|
16
|
-
from vtlengine.files.output import
|
|
17
|
-
format_time_period_external_representation
|
|
23
|
+
from vtlengine.files.output._time_period_representation import (
|
|
24
|
+
format_time_period_external_representation,
|
|
25
|
+
TimePeriodRepresentation,
|
|
26
|
+
)
|
|
18
27
|
|
|
28
|
+
pd.options.mode.chained_assignment = None
|
|
19
29
|
|
|
20
|
-
class __VTLSingleErrorListener(ErrorListener):
|
|
21
|
-
"""
|
|
22
30
|
|
|
23
|
-
|
|
31
|
+
class __VTLSingleErrorListener(ErrorListener): # type: ignore[misc]
|
|
32
|
+
""" """
|
|
24
33
|
|
|
25
|
-
def syntaxError(
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
34
|
+
def syntaxError(
|
|
35
|
+
self, recognizer: Any, offendingSymbol: str, line: str, column: str, msg: str, e: Any
|
|
36
|
+
) -> None:
|
|
37
|
+
raise Exception(
|
|
38
|
+
f"Not valid VTL Syntax \n "
|
|
39
|
+
f"offendingSymbol: {offendingSymbol} \n "
|
|
40
|
+
f"msg: {msg} \n "
|
|
41
|
+
f"line: {line}"
|
|
42
|
+
)
|
|
30
43
|
|
|
31
44
|
|
|
32
45
|
def _lexer(text: str) -> CommonTokenStream:
|
|
@@ -52,6 +65,15 @@ def _parser(stream: CommonTokenStream) -> Any:
|
|
|
52
65
|
def create_ast(text: str) -> Start:
|
|
53
66
|
"""
|
|
54
67
|
Function that creates the AST object.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
text: Vtl string expression that will be used to create the AST object.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
The ast object.
|
|
74
|
+
|
|
75
|
+
Raises:
|
|
76
|
+
Exception: When the vtl syntax expression is wrong.
|
|
55
77
|
"""
|
|
56
78
|
stream = _lexer(text)
|
|
57
79
|
cst = _parser(stream)
|
|
@@ -61,10 +83,12 @@ def create_ast(text: str) -> Start:
|
|
|
61
83
|
return ast
|
|
62
84
|
|
|
63
85
|
|
|
64
|
-
def semantic_analysis(
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
86
|
+
def semantic_analysis(
|
|
87
|
+
script: Union[str, Path],
|
|
88
|
+
data_structures: Union[Dict[str, Any], Path, List[Union[Dict[str, Any], Path]]],
|
|
89
|
+
value_domains: Optional[Union[Dict[str, Any], Path]] = None,
|
|
90
|
+
external_routines: Optional[Union[Dict[str, Any], Path]] = None,
|
|
91
|
+
) -> Any:
|
|
68
92
|
"""
|
|
69
93
|
Checks if the vtl operation can be done.To do that, it generates the AST with the vtl script
|
|
70
94
|
given and also reviews if the data structure given can fit with it.
|
|
@@ -85,9 +109,9 @@ def semantic_analysis(script: Union[str, Path],
|
|
|
85
109
|
|
|
86
110
|
- Vtl script: The expression that shows the operation to be done.
|
|
87
111
|
|
|
88
|
-
- Data Structure:
|
|
89
|
-
(and/or scalar) about the datatype (String, integer or number)
|
|
90
|
-
the role (
|
|
112
|
+
- Data Structure: JSON file that contains the structure and the name for the dataset(s) \
|
|
113
|
+
(and/or scalar) about the datatype (String, integer or number), \
|
|
114
|
+
the role (Identifier, Attribute or Measure) and the nullability each component has.
|
|
91
115
|
|
|
92
116
|
- Value domains: Collection of unique values on the same datatype.
|
|
93
117
|
|
|
@@ -95,16 +119,19 @@ def semantic_analysis(script: Union[str, Path],
|
|
|
95
119
|
|
|
96
120
|
This function has the following params:
|
|
97
121
|
|
|
98
|
-
:
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
122
|
+
Args:
|
|
123
|
+
script: String or Path of the vtl expression.
|
|
124
|
+
data_structures: Dict or Path (file or folder), \
|
|
125
|
+
or List of Dicts or Paths with the data structures JSON files.
|
|
126
|
+
value_domains: Dict or Path of the value domains JSON files. (default: None)
|
|
127
|
+
external_routines: String or Path of the external routines SQL files. (default: None)
|
|
104
128
|
|
|
105
|
-
:
|
|
129
|
+
Returns:
|
|
130
|
+
The computed datasets.
|
|
106
131
|
|
|
107
|
-
:
|
|
132
|
+
Raises:
|
|
133
|
+
Exception: If the files have the wrong format, or they do not exist, \
|
|
134
|
+
or their Paths are invalid.
|
|
108
135
|
"""
|
|
109
136
|
# AST generation
|
|
110
137
|
vtl = load_vtl(script)
|
|
@@ -122,19 +149,24 @@ def semantic_analysis(script: Union[str, Path],
|
|
|
122
149
|
ext_routines = load_external_routines(external_routines)
|
|
123
150
|
|
|
124
151
|
# Running the interpreter
|
|
125
|
-
interpreter = InterpreterAnalyzer(
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
152
|
+
interpreter = InterpreterAnalyzer(
|
|
153
|
+
datasets=structures, value_domains=vd, external_routines=ext_routines, only_semantic=True
|
|
154
|
+
)
|
|
155
|
+
with pd.option_context("future.no_silent_downcasting", True):
|
|
156
|
+
result = interpreter.visit(ast)
|
|
129
157
|
return result
|
|
130
158
|
|
|
131
159
|
|
|
132
|
-
def run(
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
160
|
+
def run(
|
|
161
|
+
script: Union[str, Path],
|
|
162
|
+
data_structures: Union[Dict[str, Any], Path, List[Union[Dict[str, Any], Path]]],
|
|
163
|
+
datapoints: Union[Dict[str, Any], str, Path, List[Union[str, Path]]],
|
|
164
|
+
value_domains: Optional[Union[Dict[str, Any], Path]] = None,
|
|
165
|
+
external_routines: Optional[Union[str, Path]] = None,
|
|
166
|
+
time_period_output_format: str = "vtl",
|
|
167
|
+
return_only_persistent: bool = False,
|
|
168
|
+
output_folder: Optional[Union[str, Path]] = None,
|
|
169
|
+
) -> Any:
|
|
138
170
|
"""
|
|
139
171
|
Run is the main function of the ``API``, which mission is to ensure the vtl operation is ready
|
|
140
172
|
to be performed.
|
|
@@ -144,7 +176,7 @@ def run(script: Union[str, Path], data_structures: Union[dict, Path, List[Union[
|
|
|
144
176
|
|
|
145
177
|
The data structure information is contained in the JSON file given,
|
|
146
178
|
and establish the datatype (string, integer or number),
|
|
147
|
-
and the role that each component is going to have (Identifier or Measure).
|
|
179
|
+
and the role that each component is going to have (Identifier, Attribute or Measure).
|
|
148
180
|
It can be a dictionary or a path to the JSON file or folder that contains it.
|
|
149
181
|
|
|
150
182
|
Moreover, a csv file with the data to operate with is going to be loaded.
|
|
@@ -186,10 +218,9 @@ def run(script: Union[str, Path], data_structures: Union[dict, Path, List[Union[
|
|
|
186
218
|
|
|
187
219
|
- Vtl script: The expression that shows the operation to be done.
|
|
188
220
|
|
|
189
|
-
- Data Structure: \
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
each component has.
|
|
221
|
+
- Data Structure: JSON file that contains the structure and the name for the dataset(s) \
|
|
222
|
+
(and/or scalar) about the datatype (String, integer or number), \
|
|
223
|
+
the role (Identifier, Attribute or Measure) and the nullability each component has.
|
|
193
224
|
|
|
194
225
|
- Data point: Pointer to the data. It will be loaded as a `Pandas Dataframe \
|
|
195
226
|
<https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_.
|
|
@@ -200,26 +231,33 @@ def run(script: Union[str, Path], data_structures: Union[dict, Path, List[Union[
|
|
|
200
231
|
|
|
201
232
|
This function has the following params:
|
|
202
233
|
|
|
203
|
-
:
|
|
234
|
+
Args:
|
|
235
|
+
script: String or Path with the vtl expression.
|
|
236
|
+
|
|
237
|
+
data_structures: Dict, Path or a List of Dicts or Paths with the data structures.
|
|
238
|
+
|
|
239
|
+
datapoints: Dict, Path, S3 URI or List of S3 URIs or Paths with data.
|
|
204
240
|
|
|
205
|
-
|
|
241
|
+
value_domains: Dict or Path of the value domains JSON files. (default:None)
|
|
206
242
|
|
|
207
|
-
|
|
243
|
+
external_routines: String or Path of the external routines SQL files. (default: None)
|
|
208
244
|
|
|
209
|
-
|
|
245
|
+
time_period_output_format: String with the possible values \
|
|
246
|
+
("sdmx_gregorian", "sdmx_reporting", "vtl") for the representation of the \
|
|
247
|
+
Time Period components.
|
|
210
248
|
|
|
211
|
-
|
|
249
|
+
return_only_persistent: If True, run function will only return the results of \
|
|
250
|
+
Persistent Assignments. (default: False)
|
|
212
251
|
|
|
213
|
-
|
|
214
|
-
("sdmx_gregorian", "sdmx_reporting", "vtl") for the representation of the \
|
|
215
|
-
Time Period components.
|
|
252
|
+
output_folder: Path or S3 URI to the output folder. (default: None)
|
|
216
253
|
|
|
217
|
-
:param return_only_persistent: If True, run function will only return the results of \
|
|
218
|
-
Persistent Assignments. (default: False)
|
|
219
254
|
|
|
220
|
-
:
|
|
255
|
+
Returns:
|
|
256
|
+
The datasets are produced without data if the output folder is defined.
|
|
221
257
|
|
|
222
|
-
:
|
|
258
|
+
Raises:
|
|
259
|
+
Exception: If the files have the wrong format, or they do not exist, \
|
|
260
|
+
or their Paths are invalid.
|
|
223
261
|
|
|
224
262
|
"""
|
|
225
263
|
# AST generation
|
|
@@ -248,13 +286,17 @@ def run(script: Union[str, Path], data_structures: Union[dict, Path, List[Union[
|
|
|
248
286
|
_check_output_folder(output_folder)
|
|
249
287
|
|
|
250
288
|
# Running the interpreter
|
|
251
|
-
interpreter = InterpreterAnalyzer(
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
289
|
+
interpreter = InterpreterAnalyzer(
|
|
290
|
+
datasets=datasets,
|
|
291
|
+
value_domains=vd,
|
|
292
|
+
external_routines=ext_routines,
|
|
293
|
+
ds_analysis=ds_analysis,
|
|
294
|
+
datapoints_paths=path_dict,
|
|
295
|
+
output_path=output_folder,
|
|
296
|
+
time_period_representation=time_period_representation,
|
|
297
|
+
)
|
|
298
|
+
with pd.option_context("future.no_silent_downcasting", True):
|
|
299
|
+
result = interpreter.visit(ast)
|
|
258
300
|
|
|
259
301
|
# Applying time period output format
|
|
260
302
|
if output_folder is None:
|