vtlengine 1.0.0__tar.gz → 1.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine-1.0.1/PKG-INFO +236 -0
- vtlengine-1.0.1/README.md +206 -0
- {vtlengine-1.0 → vtlengine-1.0.1}/pyproject.toml +26 -1
- vtlengine-1.0.1/src/vtlengine/API/_InternalApi.py +378 -0
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/API/__init__.py +109 -67
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/AST/ASTConstructor.py +188 -98
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/AST/ASTConstructorModules/Expr.py +306 -200
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/AST/ASTConstructorModules/ExprComponents.py +172 -102
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/AST/ASTConstructorModules/Terminals.py +158 -95
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/AST/ASTEncoders.py +1 -1
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/AST/ASTTemplate.py +8 -9
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/AST/ASTVisitor.py +8 -12
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/AST/DAG/__init__.py +43 -35
- vtlengine-1.0.1/src/vtlengine/AST/DAG/_words.py +9 -0
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/AST/Grammar/lexer.py +732 -142
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/AST/Grammar/parser.py +2188 -826
- vtlengine-1.0.1/src/vtlengine/AST/Grammar/tokens.py +157 -0
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/AST/VtlVisitor.py +7 -4
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/AST/__init__.py +22 -11
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/DataTypes/NumericTypesHandling.py +5 -4
- vtlengine-1.0.1/src/vtlengine/DataTypes/TimeHandling.py +517 -0
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/DataTypes/__init__.py +304 -218
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/Exceptions/__init__.py +52 -27
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/Exceptions/messages.py +134 -62
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/Interpreter/__init__.py +781 -487
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/Model/__init__.py +165 -121
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/Operators/Aggregation.py +156 -95
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/Operators/Analytic.py +115 -59
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/Operators/Assignment.py +7 -4
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/Operators/Boolean.py +27 -32
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/Operators/CastOperator.py +177 -131
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/Operators/Clause.py +137 -99
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/Operators/Comparison.py +148 -117
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/Operators/Conditional.py +149 -98
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/Operators/General.py +68 -47
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/Operators/HROperators.py +91 -72
- vtlengine-1.0.1/src/vtlengine/Operators/Join.py +443 -0
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/Operators/Numeric.py +89 -44
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/Operators/RoleSetter.py +16 -15
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/Operators/Set.py +61 -36
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/Operators/String.py +213 -139
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/Operators/Time.py +334 -216
- vtlengine-1.0.1/src/vtlengine/Operators/Validation.py +241 -0
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/Operators/__init__.py +340 -213
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/Utils/__init__.py +195 -40
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/__init__.py +1 -1
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/files/output/__init__.py +15 -6
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/files/output/_time_period_representation.py +10 -9
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/files/parser/__init__.py +77 -52
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/files/parser/_rfc_dialect.py +6 -5
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/files/parser/_time_checking.py +46 -37
- vtlengine-1.0/PKG-INFO +0 -104
- vtlengine-1.0/README.md +0 -74
- vtlengine-1.0/src/vtlengine/API/_InternalApi.py +0 -325
- vtlengine-1.0/src/vtlengine/AST/DAG/_words.py +0 -9
- vtlengine-1.0/src/vtlengine/AST/Grammar/tokens.py +0 -157
- vtlengine-1.0/src/vtlengine/DataTypes/TimeHandling.py +0 -624
- vtlengine-1.0/src/vtlengine/Operators/Join.py +0 -344
- vtlengine-1.0/src/vtlengine/Operators/Validation.py +0 -200
- {vtlengine-1.0 → vtlengine-1.0.1}/LICENSE.md +0 -0
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/AST/ASTConstructorModules/__init__.py +0 -0
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/AST/ASTDataExchange.py +0 -0
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/AST/Grammar/Vtl.g4 +0 -0
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/AST/Grammar/VtlTokens.g4 +0 -0
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/AST/Grammar/__init__.py +0 -0
- {vtlengine-1.0 → vtlengine-1.0.1}/src/vtlengine/files/__init__.py +0 -0
vtlengine-1.0.1/PKG-INFO
ADDED
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: vtlengine
|
|
3
|
+
Version: 1.0.1
|
|
4
|
+
Summary: Run and Validate VTL Scripts
|
|
5
|
+
License: AGPL-3.0
|
|
6
|
+
Author: MeaningfulData
|
|
7
|
+
Author-email: info@meaningfuldata.eu
|
|
8
|
+
Requires-Python: >=3.10,<4.0
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Intended Audience :: Information Technology
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: License :: OSI Approved :: GNU Affero General Public License v3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Requires-Dist: antlr4-python3-runtime (==4.9.2)
|
|
20
|
+
Requires-Dist: bottleneck (>=1.3.4,<2.0.0)
|
|
21
|
+
Requires-Dist: duckdb (>=1.1.1,<2.0.0)
|
|
22
|
+
Requires-Dist: networkx (>=2.8.8,<3.0.0)
|
|
23
|
+
Requires-Dist: numba (>=0.60.0,<0.61.0)
|
|
24
|
+
Requires-Dist: numexpr (>=2.9.0,<3.0.0)
|
|
25
|
+
Requires-Dist: pandas (>=2.1.4,<3.0.0)
|
|
26
|
+
Requires-Dist: pyarrow (>=17.0.0,<18.0.0)
|
|
27
|
+
Requires-Dist: s3fs (>=2024.9.0,<2025.0.0)
|
|
28
|
+
Requires-Dist: sqlglot (>=22.2.0,<23.0.0)
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
|
|
31
|
+
# VTL Engine
|
|
32
|
+
|
|
33
|
+
## Introduction
|
|
34
|
+
|
|
35
|
+
The VTL Engine is a Python library for validating and running VTL scripts.
|
|
36
|
+
|
|
37
|
+
It is a Python-based library around the [VTL Language](http://sdmx.org/?page_id=5096).
|
|
38
|
+
|
|
39
|
+
## Installation
|
|
40
|
+
|
|
41
|
+
### Requirements
|
|
42
|
+
|
|
43
|
+
The VTL Engine requires Python 3.10 or higher.
|
|
44
|
+
|
|
45
|
+
### Install with pip
|
|
46
|
+
|
|
47
|
+
To install the VTL Engine on any Operating System, you can use pip:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
|
|
51
|
+
pip install vtlengine
|
|
52
|
+
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
*Note: it is recommended to install the VTL Engine in a virtual environment.*
|
|
56
|
+
|
|
57
|
+
## Usage
|
|
58
|
+
|
|
59
|
+
The VTL Engine API implements two basic methods:
|
|
60
|
+
|
|
61
|
+
* **Semantic Analysis**: aimed at validating the correctness of a script and computing the data
|
|
62
|
+
structures of the data sets created in the script.
|
|
63
|
+
* **Run**: aimed at executing the provided input on the provided input datasets.
|
|
64
|
+
|
|
65
|
+
Any action with VTL requires the following elements as input:
|
|
66
|
+
|
|
67
|
+
* **VTL Script**: Is the VTL to be executed, which includes the transformation scheme, as well as de
|
|
68
|
+
User Defined Operators, Hierarchical Rulesets and Datapoint Rulesets. It is provided as a string
|
|
69
|
+
or as a Path object to a vtl file.
|
|
70
|
+
* **Data structures** : Provides the structure of the input artifacts of the VTL script, according to
|
|
71
|
+
the VTL Information model. Given that the current version doesn't prescribe a standard format for
|
|
72
|
+
providing the information, the VTL Engine is implementing a JSON format that can be found here.
|
|
73
|
+
Data Structures can be provided as Dictionaries or as Paths to JSON files. It is possible to have
|
|
74
|
+
* **External routines**: The VTL Engine allows using SQL (SQLite) with the eval operator. Can be
|
|
75
|
+
provided as a string with the SQL or as a path object to an SQL file. Its default value is `None`,
|
|
76
|
+
which shall be used if external routines are not applicable to the VTL script.
|
|
77
|
+
* **Value domains**: Provides the value domains that are used in the VTL script, normally with an in
|
|
78
|
+
operator. Can be provided as a dictionary or as a path to a JSON file. Its default value
|
|
79
|
+
is `None`, which shall be used if value domains are not applicable to the VTL script.
|
|
80
|
+
|
|
81
|
+
### Semantic Analysis
|
|
82
|
+
|
|
83
|
+
The `semantic_analysis` method serves to validate the correctness of a VTL script, as well as to
|
|
84
|
+
calculate the data structures of the datasets generated by the VTL script itself (that calculation
|
|
85
|
+
is a pre-requisite for the semantic analysis).
|
|
86
|
+
|
|
87
|
+
* If the VTL script is correct, the method returns a dictionary with the data structures of all the
|
|
88
|
+
datasets generated by the script.
|
|
89
|
+
* If the VTL script is incorrect, raises a VTL Engine custom error Explaining the error.
|
|
90
|
+
|
|
91
|
+
#### Example 1: Correct VTL
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
from vtlengine import semantic_analysis
|
|
95
|
+
|
|
96
|
+
script = """
|
|
97
|
+
DS_A := DS_1 * 10;
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
data_structures = {
|
|
101
|
+
'datasets': [
|
|
102
|
+
{'name': 'DS_1',
|
|
103
|
+
'DataStructure': [
|
|
104
|
+
{'name': 'Id_1',
|
|
105
|
+
'type':
|
|
106
|
+
'Integer',
|
|
107
|
+
'role': 'Identifier',
|
|
108
|
+
'nullable': False},
|
|
109
|
+
{'name': 'Me_1',
|
|
110
|
+
'type': 'Number',
|
|
111
|
+
'role': 'Measure',
|
|
112
|
+
'nullable': True}
|
|
113
|
+
]
|
|
114
|
+
}
|
|
115
|
+
]
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
sa_result = semantic_analysis(script=script, data_structures=data_structures)
|
|
119
|
+
|
|
120
|
+
print(sa_result)
|
|
121
|
+
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
|
|
126
|
+
```
|
|
127
|
+
{'DS_A': Dataset(name='DS_A', components={'Id_1': Component(name='Id_1', data_type=<class 'vtlengine.DataTypes.Integer'>, role=<Role.IDENTIFIER: 'Identifier'>, nullable=False), 'Me_1': Component(name='Me_1', data_type=<class 'vtlengine.DataTypes.Number'>, role=<Role.MEASURE: 'Measure'>, nullable=True)}, data=None)}
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
#### Example 2: Incorrect VTL
|
|
131
|
+
|
|
132
|
+
Note that, as compared to Example 1, the only change is that Me_1 is of the String data type,
|
|
133
|
+
instead of Number.
|
|
134
|
+
|
|
135
|
+
```python
|
|
136
|
+
from vtlengine import semantic_analysis
|
|
137
|
+
|
|
138
|
+
script = """
|
|
139
|
+
DS_A := DS_1 * 10;
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
data_structures = {
|
|
143
|
+
'datasets': [
|
|
144
|
+
{'name': 'DS_1',
|
|
145
|
+
'DataStructure': [
|
|
146
|
+
{'name': 'Id_1',
|
|
147
|
+
'type':
|
|
148
|
+
'Integer',
|
|
149
|
+
'role': 'Identifier',
|
|
150
|
+
'nullable': False},
|
|
151
|
+
{'name': 'Me_1',
|
|
152
|
+
'type': 'String',
|
|
153
|
+
'role': 'Measure',
|
|
154
|
+
'nullable': True}
|
|
155
|
+
]
|
|
156
|
+
}
|
|
157
|
+
]
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
sa_result = semantic_analysis(script=script, data_structures=data_structures)
|
|
161
|
+
|
|
162
|
+
print(sa_result)
|
|
163
|
+
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
Will raise the following Error:
|
|
167
|
+
|
|
168
|
+
``` python
|
|
169
|
+
raise SemanticError(code="1-1-1-2",
|
|
170
|
+
vtlengine.Exceptions.SemanticError: ('Invalid implicit cast from String and Integer to Number.', '1-1-1-2')
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
### Run VTL Scripts
|
|
174
|
+
|
|
175
|
+
The `run` method serves to execute a VTL script with input datapoints.
|
|
176
|
+
|
|
177
|
+
Returns a dictionary with all the generated Datasets.
|
|
178
|
+
When the output parameter is set, the engine will write the result of the computation to the output
|
|
179
|
+
folder, else it will include the data in the dictionary of the computed datasets.
|
|
180
|
+
|
|
181
|
+
Two validations are performed before running, which can raise errors:
|
|
182
|
+
|
|
183
|
+
* Semantic analysis: Equivalent to running the `semantic_analysis` method
|
|
184
|
+
* Data load analysis: Basic check of the data structure (names and types)
|
|
185
|
+
|
|
186
|
+
#### Example 3: Simple run
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
from vtlengine import run
|
|
190
|
+
import pandas as pd
|
|
191
|
+
|
|
192
|
+
script = """
|
|
193
|
+
DS_A := DS_1 * 10;
|
|
194
|
+
"""
|
|
195
|
+
|
|
196
|
+
data_structures = {
|
|
197
|
+
'datasets': [
|
|
198
|
+
{'name': 'DS_1',
|
|
199
|
+
'DataStructure': [
|
|
200
|
+
{'name': 'Id_1',
|
|
201
|
+
'type':
|
|
202
|
+
'Integer',
|
|
203
|
+
'role': 'Identifier',
|
|
204
|
+
'nullable': False},
|
|
205
|
+
{'name': 'Me_1',
|
|
206
|
+
'type': 'Number',
|
|
207
|
+
'role': 'Measure',
|
|
208
|
+
'nullable': True}
|
|
209
|
+
]
|
|
210
|
+
}
|
|
211
|
+
]
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
data_df = pd.DataFrame(
|
|
215
|
+
{"Id_1": [1, 2, 3],
|
|
216
|
+
"Me_1": [10, 20, 30]})
|
|
217
|
+
|
|
218
|
+
datapoints = {"DS_1": data_df}
|
|
219
|
+
|
|
220
|
+
run_result = run(script=script, data_structures=data_structures,
|
|
221
|
+
datapoints=datapoints)
|
|
222
|
+
|
|
223
|
+
print(run_result)
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
returns:
|
|
227
|
+
|
|
228
|
+
``` python
|
|
229
|
+
{'DS_A': Dataset(name='DS_A', components={'Id_1': Component(name='Id_1', data_type=<class 'vtlengine.DataTypes.Integer'>, role=<Role.IDENTIFIER: 'Identifier'>, nullable=False), 'Me_1': Component(name='Me_1', data_type=<class 'vtlengine.DataTypes.Number'>, role=<Role.MEASURE: 'Measure'>, nullable=True)}, data= Id_1 Me_1
|
|
230
|
+
0 1 100.0
|
|
231
|
+
1 2 200.0
|
|
232
|
+
2 3 300.0)}
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
For more information on usage, please refer to
|
|
236
|
+
the [API documentation](https://docs.vtlengine.meaningfuldata.eu/api.html).
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
# VTL Engine
|
|
2
|
+
|
|
3
|
+
## Introduction
|
|
4
|
+
|
|
5
|
+
The VTL Engine is a Python library for validating and running VTL scripts.
|
|
6
|
+
|
|
7
|
+
It is a Python-based library around the [VTL Language](http://sdmx.org/?page_id=5096).
|
|
8
|
+
|
|
9
|
+
## Installation
|
|
10
|
+
|
|
11
|
+
### Requirements
|
|
12
|
+
|
|
13
|
+
The VTL Engine requires Python 3.10 or higher.
|
|
14
|
+
|
|
15
|
+
### Install with pip
|
|
16
|
+
|
|
17
|
+
To install the VTL Engine on any Operating System, you can use pip:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
|
|
21
|
+
pip install vtlengine
|
|
22
|
+
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
*Note: it is recommended to install the VTL Engine in a virtual environment.*
|
|
26
|
+
|
|
27
|
+
## Usage
|
|
28
|
+
|
|
29
|
+
The VTL Engine API implements two basic methods:
|
|
30
|
+
|
|
31
|
+
* **Semantic Analysis**: aimed at validating the correctness of a script and computing the data
|
|
32
|
+
structures of the data sets created in the script.
|
|
33
|
+
* **Run**: aimed at executing the provided input on the provided input datasets.
|
|
34
|
+
|
|
35
|
+
Any action with VTL requires the following elements as input:
|
|
36
|
+
|
|
37
|
+
* **VTL Script**: Is the VTL to be executed, which includes the transformation scheme, as well as de
|
|
38
|
+
User Defined Operators, Hierarchical Rulesets and Datapoint Rulesets. It is provided as a string
|
|
39
|
+
or as a Path object to a vtl file.
|
|
40
|
+
* **Data structures** : Provides the structure of the input artifacts of the VTL script, according to
|
|
41
|
+
the VTL Information model. Given that the current version doesn't prescribe a standard format for
|
|
42
|
+
providing the information, the VTL Engine is implementing a JSON format that can be found here.
|
|
43
|
+
Data Structures can be provided as Dictionaries or as Paths to JSON files. It is possible to have
|
|
44
|
+
* **External routines**: The VTL Engine allows using SQL (SQLite) with the eval operator. Can be
|
|
45
|
+
provided as a string with the SQL or as a path object to an SQL file. Its default value is `None`,
|
|
46
|
+
which shall be used if external routines are not applicable to the VTL script.
|
|
47
|
+
* **Value domains**: Provides the value domains that are used in the VTL script, normally with an in
|
|
48
|
+
operator. Can be provided as a dictionary or as a path to a JSON file. Its default value
|
|
49
|
+
is `None`, which shall be used if value domains are not applicable to the VTL script.
|
|
50
|
+
|
|
51
|
+
### Semantic Analysis
|
|
52
|
+
|
|
53
|
+
The `semantic_analysis` method serves to validate the correctness of a VTL script, as well as to
|
|
54
|
+
calculate the data structures of the datasets generated by the VTL script itself (that calculation
|
|
55
|
+
is a pre-requisite for the semantic analysis).
|
|
56
|
+
|
|
57
|
+
* If the VTL script is correct, the method returns a dictionary with the data structures of all the
|
|
58
|
+
datasets generated by the script.
|
|
59
|
+
* If the VTL script is incorrect, raises a VTL Engine custom error Explaining the error.
|
|
60
|
+
|
|
61
|
+
#### Example 1: Correct VTL
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
from vtlengine import semantic_analysis
|
|
65
|
+
|
|
66
|
+
script = """
|
|
67
|
+
DS_A := DS_1 * 10;
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
data_structures = {
|
|
71
|
+
'datasets': [
|
|
72
|
+
{'name': 'DS_1',
|
|
73
|
+
'DataStructure': [
|
|
74
|
+
{'name': 'Id_1',
|
|
75
|
+
'type':
|
|
76
|
+
'Integer',
|
|
77
|
+
'role': 'Identifier',
|
|
78
|
+
'nullable': False},
|
|
79
|
+
{'name': 'Me_1',
|
|
80
|
+
'type': 'Number',
|
|
81
|
+
'role': 'Measure',
|
|
82
|
+
'nullable': True}
|
|
83
|
+
]
|
|
84
|
+
}
|
|
85
|
+
]
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
sa_result = semantic_analysis(script=script, data_structures=data_structures)
|
|
89
|
+
|
|
90
|
+
print(sa_result)
|
|
91
|
+
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
|
|
96
|
+
```
|
|
97
|
+
{'DS_A': Dataset(name='DS_A', components={'Id_1': Component(name='Id_1', data_type=<class 'vtlengine.DataTypes.Integer'>, role=<Role.IDENTIFIER: 'Identifier'>, nullable=False), 'Me_1': Component(name='Me_1', data_type=<class 'vtlengine.DataTypes.Number'>, role=<Role.MEASURE: 'Measure'>, nullable=True)}, data=None)}
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
#### Example 2: Incorrect VTL
|
|
101
|
+
|
|
102
|
+
Note that, as compared to Example 1, the only change is that Me_1 is of the String data type,
|
|
103
|
+
instead of Number.
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
from vtlengine import semantic_analysis
|
|
107
|
+
|
|
108
|
+
script = """
|
|
109
|
+
DS_A := DS_1 * 10;
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
data_structures = {
|
|
113
|
+
'datasets': [
|
|
114
|
+
{'name': 'DS_1',
|
|
115
|
+
'DataStructure': [
|
|
116
|
+
{'name': 'Id_1',
|
|
117
|
+
'type':
|
|
118
|
+
'Integer',
|
|
119
|
+
'role': 'Identifier',
|
|
120
|
+
'nullable': False},
|
|
121
|
+
{'name': 'Me_1',
|
|
122
|
+
'type': 'String',
|
|
123
|
+
'role': 'Measure',
|
|
124
|
+
'nullable': True}
|
|
125
|
+
]
|
|
126
|
+
}
|
|
127
|
+
]
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
sa_result = semantic_analysis(script=script, data_structures=data_structures)
|
|
131
|
+
|
|
132
|
+
print(sa_result)
|
|
133
|
+
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
Will raise the following Error:
|
|
137
|
+
|
|
138
|
+
``` python
|
|
139
|
+
raise SemanticError(code="1-1-1-2",
|
|
140
|
+
vtlengine.Exceptions.SemanticError: ('Invalid implicit cast from String and Integer to Number.', '1-1-1-2')
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
### Run VTL Scripts
|
|
144
|
+
|
|
145
|
+
The `run` method serves to execute a VTL script with input datapoints.
|
|
146
|
+
|
|
147
|
+
Returns a dictionary with all the generated Datasets.
|
|
148
|
+
When the output parameter is set, the engine will write the result of the computation to the output
|
|
149
|
+
folder, else it will include the data in the dictionary of the computed datasets.
|
|
150
|
+
|
|
151
|
+
Two validations are performed before running, which can raise errors:
|
|
152
|
+
|
|
153
|
+
* Semantic analysis: Equivalent to running the `semantic_analysis` method
|
|
154
|
+
* Data load analysis: Basic check of the data structure (names and types)
|
|
155
|
+
|
|
156
|
+
#### Example 3: Simple run
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
from vtlengine import run
|
|
160
|
+
import pandas as pd
|
|
161
|
+
|
|
162
|
+
script = """
|
|
163
|
+
DS_A := DS_1 * 10;
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
data_structures = {
|
|
167
|
+
'datasets': [
|
|
168
|
+
{'name': 'DS_1',
|
|
169
|
+
'DataStructure': [
|
|
170
|
+
{'name': 'Id_1',
|
|
171
|
+
'type':
|
|
172
|
+
'Integer',
|
|
173
|
+
'role': 'Identifier',
|
|
174
|
+
'nullable': False},
|
|
175
|
+
{'name': 'Me_1',
|
|
176
|
+
'type': 'Number',
|
|
177
|
+
'role': 'Measure',
|
|
178
|
+
'nullable': True}
|
|
179
|
+
]
|
|
180
|
+
}
|
|
181
|
+
]
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
data_df = pd.DataFrame(
|
|
185
|
+
{"Id_1": [1, 2, 3],
|
|
186
|
+
"Me_1": [10, 20, 30]})
|
|
187
|
+
|
|
188
|
+
datapoints = {"DS_1": data_df}
|
|
189
|
+
|
|
190
|
+
run_result = run(script=script, data_structures=data_structures,
|
|
191
|
+
datapoints=datapoints)
|
|
192
|
+
|
|
193
|
+
print(run_result)
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
returns:
|
|
197
|
+
|
|
198
|
+
``` python
|
|
199
|
+
{'DS_A': Dataset(name='DS_A', components={'Id_1': Component(name='Id_1', data_type=<class 'vtlengine.DataTypes.Integer'>, role=<Role.IDENTIFIER: 'Identifier'>, nullable=False), 'Me_1': Component(name='Me_1', data_type=<class 'vtlengine.DataTypes.Number'>, role=<Role.MEASURE: 'Measure'>, nullable=True)}, data= Id_1 Me_1
|
|
200
|
+
0 1 100.0
|
|
201
|
+
1 2 200.0
|
|
202
|
+
2 3 300.0)}
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
For more information on usage, please refer to
|
|
206
|
+
the [API documentation](https://docs.vtlengine.meaningfuldata.eu/api.html).
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "vtlengine"
|
|
3
|
-
version = "1.0"
|
|
3
|
+
version = "1.0.1"
|
|
4
4
|
description = "Run and Validate VTL Scripts"
|
|
5
5
|
authors = ["MeaningfulData <info@meaningfuldata.eu>"]
|
|
6
6
|
license = "AGPL-3.0"
|
|
@@ -34,7 +34,32 @@ pytest-cov = "^5.0.0"
|
|
|
34
34
|
line-profiler-pycharm = "^1.2.0"
|
|
35
35
|
sphinx = "^7.4"
|
|
36
36
|
sphinx-rtd-theme = "^2.0.0"
|
|
37
|
+
flake8 = "^7.1.1"
|
|
38
|
+
black = "^24.8.0"
|
|
39
|
+
mypy = "^1.11.2"
|
|
40
|
+
pandas-stubs = "^2.2.3.241009"
|
|
41
|
+
stubs = "^1.0.0"
|
|
37
42
|
|
|
43
|
+
[tool.black]
|
|
44
|
+
line_length = 100
|
|
45
|
+
|
|
46
|
+
[tool.mypy]
|
|
47
|
+
files = "src"
|
|
48
|
+
exclude = "src/vtlengine/AST/.*"
|
|
49
|
+
disallow_untyped_defs = true
|
|
50
|
+
disallow_untyped_calls = true
|
|
51
|
+
ignore_errors = false
|
|
52
|
+
no_implicit_optional = true
|
|
53
|
+
show_column_numbers = true
|
|
54
|
+
strict_equality = true
|
|
55
|
+
strict_optional = true
|
|
56
|
+
strict = true
|
|
57
|
+
enable_error_code = [
|
|
58
|
+
"ignore-without-code",
|
|
59
|
+
"redundant-expr",
|
|
60
|
+
"truthy-bool",
|
|
61
|
+
]
|
|
62
|
+
warn_return_any = false
|
|
38
63
|
|
|
39
64
|
[build-system]
|
|
40
65
|
requires = ["poetry-core"]
|