vtlengine 1.0__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +159 -102
- vtlengine/API/__init__.py +110 -68
- vtlengine/AST/ASTConstructor.py +188 -98
- vtlengine/AST/ASTConstructorModules/Expr.py +402 -205
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +248 -104
- vtlengine/AST/ASTConstructorModules/Terminals.py +158 -95
- vtlengine/AST/ASTEncoders.py +1 -1
- vtlengine/AST/ASTTemplate.py +24 -9
- vtlengine/AST/ASTVisitor.py +8 -12
- vtlengine/AST/DAG/__init__.py +43 -35
- vtlengine/AST/DAG/_words.py +4 -4
- vtlengine/AST/Grammar/Vtl.g4 +49 -20
- vtlengine/AST/Grammar/VtlTokens.g4 +13 -1
- vtlengine/AST/Grammar/lexer.py +2012 -1312
- vtlengine/AST/Grammar/parser.py +7524 -4343
- vtlengine/AST/Grammar/tokens.py +140 -128
- vtlengine/AST/VtlVisitor.py +16 -5
- vtlengine/AST/__init__.py +41 -11
- vtlengine/DataTypes/NumericTypesHandling.py +5 -4
- vtlengine/DataTypes/TimeHandling.py +196 -301
- vtlengine/DataTypes/__init__.py +304 -218
- vtlengine/Exceptions/__init__.py +96 -27
- vtlengine/Exceptions/messages.py +149 -69
- vtlengine/Interpreter/__init__.py +817 -497
- vtlengine/Model/__init__.py +172 -121
- vtlengine/Operators/Aggregation.py +156 -95
- vtlengine/Operators/Analytic.py +167 -79
- vtlengine/Operators/Assignment.py +7 -4
- vtlengine/Operators/Boolean.py +27 -32
- vtlengine/Operators/CastOperator.py +177 -131
- vtlengine/Operators/Clause.py +137 -99
- vtlengine/Operators/Comparison.py +148 -117
- vtlengine/Operators/Conditional.py +290 -98
- vtlengine/Operators/General.py +68 -47
- vtlengine/Operators/HROperators.py +91 -72
- vtlengine/Operators/Join.py +217 -118
- vtlengine/Operators/Numeric.py +129 -46
- vtlengine/Operators/RoleSetter.py +16 -15
- vtlengine/Operators/Set.py +61 -36
- vtlengine/Operators/String.py +213 -139
- vtlengine/Operators/Time.py +467 -215
- vtlengine/Operators/Validation.py +117 -76
- vtlengine/Operators/__init__.py +340 -213
- vtlengine/Utils/__init__.py +232 -41
- vtlengine/__init__.py +1 -1
- vtlengine/files/output/__init__.py +15 -6
- vtlengine/files/output/_time_period_representation.py +10 -9
- vtlengine/files/parser/__init__.py +79 -52
- vtlengine/files/parser/_rfc_dialect.py +6 -5
- vtlengine/files/parser/_time_checking.py +48 -37
- vtlengine-1.0.2.dist-info/METADATA +245 -0
- vtlengine-1.0.2.dist-info/RECORD +58 -0
- {vtlengine-1.0.dist-info → vtlengine-1.0.2.dist-info}/WHEEL +1 -1
- vtlengine-1.0.dist-info/METADATA +0 -104
- vtlengine-1.0.dist-info/RECORD +0 -58
- {vtlengine-1.0.dist-info → vtlengine-1.0.2.dist-info}/LICENSE.md +0 -0
vtlengine/Model/__init__.py
CHANGED
|
@@ -2,7 +2,7 @@ import json
|
|
|
2
2
|
from collections import Counter
|
|
3
3
|
from dataclasses import dataclass
|
|
4
4
|
from enum import Enum
|
|
5
|
-
from typing import Dict, List, Optional, Union
|
|
5
|
+
from typing import Dict, List, Optional, Union, Any, Type
|
|
6
6
|
|
|
7
7
|
import vtlengine.DataTypes as DataTypes
|
|
8
8
|
import pandas as pd
|
|
@@ -10,7 +10,7 @@ import sqlglot
|
|
|
10
10
|
import sqlglot.expressions as exp
|
|
11
11
|
from vtlengine.DataTypes import SCALAR_TYPES, ScalarType
|
|
12
12
|
from vtlengine.DataTypes.TimeHandling import TimePeriodHandler
|
|
13
|
-
from pandas import DataFrame as PandasDataFrame
|
|
13
|
+
from pandas import DataFrame as PandasDataFrame
|
|
14
14
|
from pandas._testing import assert_frame_equal
|
|
15
15
|
|
|
16
16
|
|
|
@@ -22,16 +22,17 @@ class Scalar:
|
|
|
22
22
|
"""
|
|
23
23
|
Class representing a scalar value
|
|
24
24
|
"""
|
|
25
|
+
|
|
25
26
|
name: str
|
|
26
|
-
data_type: ScalarType
|
|
27
|
-
value:
|
|
27
|
+
data_type: Type[ScalarType]
|
|
28
|
+
value: Any
|
|
28
29
|
|
|
29
30
|
@classmethod
|
|
30
|
-
def from_json(cls, json_str):
|
|
31
|
+
def from_json(cls, json_str: str) -> "Scalar":
|
|
31
32
|
data = json.loads(json_str)
|
|
32
|
-
return cls(data[
|
|
33
|
+
return cls(data["name"], SCALAR_TYPES[data["data_type"]], data["value"])
|
|
33
34
|
|
|
34
|
-
def __eq__(self, other):
|
|
35
|
+
def __eq__(self, other: Any) -> bool:
|
|
35
36
|
same_name = self.name == other.name
|
|
36
37
|
same_type = self.data_type == other.data_type
|
|
37
38
|
x = None if not pd.isnull(self.value) else self.value
|
|
@@ -40,10 +41,18 @@ class Scalar:
|
|
|
40
41
|
return same_name and same_type and same_value
|
|
41
42
|
|
|
42
43
|
|
|
44
|
+
Role_keys = [
|
|
45
|
+
"Identifier",
|
|
46
|
+
"Attribute",
|
|
47
|
+
"Measure",
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
|
|
43
51
|
class Role(Enum):
|
|
44
52
|
"""
|
|
45
53
|
Enum class for the role of a component (Identifier, Attribute, Measure)
|
|
46
54
|
"""
|
|
55
|
+
|
|
47
56
|
IDENTIFIER = "Identifier"
|
|
48
57
|
ATTRIBUTE = "Attribute"
|
|
49
58
|
MEASURE = "Measure"
|
|
@@ -52,32 +61,38 @@ class Role(Enum):
|
|
|
52
61
|
@dataclass
|
|
53
62
|
class DataComponent:
|
|
54
63
|
"""A component of a dataset with data"""
|
|
64
|
+
|
|
55
65
|
name: str
|
|
56
66
|
# data: Optional[Union[PandasSeries, SparkSeries]]
|
|
57
|
-
data: Optional[
|
|
58
|
-
data_type: ScalarType
|
|
67
|
+
data: Optional[Any]
|
|
68
|
+
data_type: Type[ScalarType]
|
|
59
69
|
role: Role = Role.MEASURE
|
|
60
70
|
nullable: bool = True
|
|
61
71
|
|
|
62
|
-
def __eq__(self, other):
|
|
72
|
+
def __eq__(self, other: Any) -> bool:
|
|
63
73
|
if not isinstance(other, DataComponent):
|
|
64
74
|
return False
|
|
65
75
|
return self.to_dict() == other.to_dict()
|
|
66
76
|
|
|
67
77
|
@classmethod
|
|
68
|
-
def from_json(cls, json_str):
|
|
69
|
-
return cls(
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
78
|
+
def from_json(cls, json_str: Any) -> "DataComponent":
|
|
79
|
+
return cls(
|
|
80
|
+
json_str["name"],
|
|
81
|
+
None,
|
|
82
|
+
SCALAR_TYPES[json_str["data_type"]],
|
|
83
|
+
Role(json_str["role"]),
|
|
84
|
+
json_str["nullable"],
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
73
88
|
return {
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
89
|
+
"name": self.name,
|
|
90
|
+
"data": self.data,
|
|
91
|
+
"data_type": self.data_type,
|
|
92
|
+
"role": self.role,
|
|
78
93
|
}
|
|
79
94
|
|
|
80
|
-
def to_json(self):
|
|
95
|
+
def to_json(self) -> str:
|
|
81
96
|
return json.dumps(self.to_dict(), indent=4)
|
|
82
97
|
|
|
83
98
|
|
|
@@ -86,58 +101,69 @@ class Component:
|
|
|
86
101
|
"""
|
|
87
102
|
Class representing a component of a dataset
|
|
88
103
|
"""
|
|
104
|
+
|
|
89
105
|
name: str
|
|
90
|
-
data_type: ScalarType
|
|
106
|
+
data_type: Type[ScalarType]
|
|
91
107
|
role: Role
|
|
92
108
|
nullable: bool
|
|
93
109
|
|
|
94
|
-
def __post_init__(self):
|
|
110
|
+
def __post_init__(self) -> None:
|
|
95
111
|
if self.role == Role.IDENTIFIER and self.nullable:
|
|
96
112
|
raise ValueError(f"Identifier {self.name} cannot be nullable")
|
|
97
113
|
|
|
98
|
-
def __eq__(self, other):
|
|
114
|
+
def __eq__(self, other: Any) -> bool:
|
|
99
115
|
return self.to_dict() == other.to_dict()
|
|
100
116
|
|
|
101
|
-
def copy(self):
|
|
117
|
+
def copy(self) -> "Component":
|
|
102
118
|
return Component(self.name, self.data_type, self.role, self.nullable)
|
|
103
119
|
|
|
104
120
|
@classmethod
|
|
105
|
-
def from_json(cls, json_str):
|
|
106
|
-
return cls(
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
121
|
+
def from_json(cls, json_str: Any) -> "Component":
|
|
122
|
+
return cls(
|
|
123
|
+
json_str["name"],
|
|
124
|
+
SCALAR_TYPES[json_str["data_type"]],
|
|
125
|
+
Role(json_str["role"]),
|
|
126
|
+
json_str["nullable"],
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
110
130
|
return {
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
131
|
+
"name": self.name,
|
|
132
|
+
"data_type": DataTypes.SCALAR_TYPES_CLASS_REVERSE[self.data_type],
|
|
133
|
+
"role": self.role.value,
|
|
134
|
+
"nullable": self.nullable,
|
|
115
135
|
}
|
|
116
136
|
|
|
117
|
-
def to_json(self):
|
|
118
|
-
return json.dumps(self.to_dict()
|
|
137
|
+
def to_json(self) -> str:
|
|
138
|
+
return json.dumps(self.to_dict())
|
|
119
139
|
|
|
120
|
-
def rename(self, new_name: str):
|
|
140
|
+
def rename(self, new_name: str) -> None:
|
|
121
141
|
self.name = new_name
|
|
122
142
|
|
|
143
|
+
def __str__(self) -> str:
|
|
144
|
+
return self.to_json()
|
|
145
|
+
|
|
146
|
+
__repr__ = __str__
|
|
147
|
+
|
|
123
148
|
|
|
124
149
|
@dataclass
|
|
125
150
|
class Dataset:
|
|
126
151
|
name: str
|
|
127
152
|
components: Dict[str, Component]
|
|
128
153
|
# data: Optional[Union[SparkDataFrame, PandasDataFrame]]
|
|
129
|
-
data: Optional[PandasDataFrame]
|
|
154
|
+
data: Optional[PandasDataFrame] = None
|
|
130
155
|
|
|
131
|
-
def __post_init__(self):
|
|
156
|
+
def __post_init__(self) -> None:
|
|
132
157
|
if self.data is not None:
|
|
133
158
|
if len(self.components) != len(self.data.columns):
|
|
134
159
|
raise ValueError(
|
|
135
|
-
"The number of components must match the number of columns in the data"
|
|
160
|
+
"The number of components must match the number of columns in the data"
|
|
161
|
+
)
|
|
136
162
|
for name, component in self.components.items():
|
|
137
163
|
if name not in self.data.columns:
|
|
138
164
|
raise ValueError(f"Component {name} not found in the data")
|
|
139
165
|
|
|
140
|
-
def __eq__(self, other):
|
|
166
|
+
def __eq__(self, other: Any) -> bool:
|
|
141
167
|
if not isinstance(other, Dataset):
|
|
142
168
|
return False
|
|
143
169
|
|
|
@@ -149,22 +175,30 @@ class Dataset:
|
|
|
149
175
|
same_components = self.components == other.components
|
|
150
176
|
if not same_components:
|
|
151
177
|
print("\nComponents mismatch")
|
|
152
|
-
result_comps = self.to_dict()[
|
|
153
|
-
reference_comps = other.to_dict()[
|
|
178
|
+
result_comps = self.to_dict()["components"]
|
|
179
|
+
reference_comps = other.to_dict()["components"]
|
|
154
180
|
if len(result_comps) != len(reference_comps):
|
|
155
181
|
print(
|
|
156
|
-
f"Shape mismatch: result:{len(result_comps)}
|
|
182
|
+
f"Shape mismatch: result:{len(result_comps)} "
|
|
183
|
+
f"!= reference:{len(reference_comps)}"
|
|
184
|
+
)
|
|
157
185
|
if len(result_comps) < len(reference_comps):
|
|
158
|
-
print(
|
|
159
|
-
|
|
186
|
+
print(
|
|
187
|
+
"Missing components in result:",
|
|
188
|
+
set(reference_comps.keys()) - set(result_comps.keys()),
|
|
189
|
+
)
|
|
160
190
|
else:
|
|
161
|
-
print(
|
|
162
|
-
|
|
191
|
+
print(
|
|
192
|
+
"Additional components in result:",
|
|
193
|
+
set(result_comps.keys()) - set(reference_comps.keys()),
|
|
194
|
+
)
|
|
163
195
|
return False
|
|
164
196
|
|
|
165
|
-
diff_comps = {
|
|
166
|
-
|
|
167
|
-
|
|
197
|
+
diff_comps = {
|
|
198
|
+
k: v
|
|
199
|
+
for k, v in result_comps.items()
|
|
200
|
+
if (k in reference_comps and v != reference_comps[k]) or k not in reference_comps
|
|
201
|
+
}
|
|
168
202
|
ref_diff_comps = {k: v for k, v in reference_comps.items() if k in diff_comps}
|
|
169
203
|
print(f"Differences in components {self.name}: ")
|
|
170
204
|
print("result:", json.dumps(diff_comps, indent=4))
|
|
@@ -173,45 +207,59 @@ class Dataset:
|
|
|
173
207
|
|
|
174
208
|
if self.data is None and other.data is None:
|
|
175
209
|
return True
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
# if isinstance(other.data, SparkDataFrame):
|
|
179
|
-
# other.data = other.data.to_pandas()
|
|
210
|
+
elif self.data is None or other.data is None:
|
|
211
|
+
return False
|
|
180
212
|
if len(self.data) == len(other.data) == 0:
|
|
181
213
|
assert self.data.shape == other.data.shape
|
|
182
214
|
|
|
183
215
|
self.data.fillna("", inplace=True)
|
|
184
216
|
other.data.fillna("", inplace=True)
|
|
185
|
-
# self.data = self.data.sort_values(by=self.get_identifiers_names()).reset_index(drop=True)
|
|
186
|
-
# other.data = other.data.sort_values(by=other.get_identifiers_names().sort()).reset_index(drop=True)
|
|
187
217
|
sorted_identifiers = sorted(self.get_identifiers_names())
|
|
188
218
|
self.data = self.data.sort_values(by=sorted_identifiers).reset_index(drop=True)
|
|
189
219
|
other.data = other.data.sort_values(by=sorted_identifiers).reset_index(drop=True)
|
|
190
220
|
self.data = self.data.reindex(sorted(self.data.columns), axis=1)
|
|
191
221
|
other.data = other.data.reindex(sorted(other.data.columns), axis=1)
|
|
192
222
|
for comp in self.components.values():
|
|
193
|
-
|
|
223
|
+
type_name: str = comp.data_type.__name__.__str__()
|
|
224
|
+
if type_name in ["String", "Date"]:
|
|
194
225
|
self.data[comp.name] = self.data[comp.name].astype(str)
|
|
195
226
|
other.data[comp.name] = other.data[comp.name].astype(str)
|
|
196
|
-
elif
|
|
227
|
+
elif type_name == "TimePeriod":
|
|
197
228
|
self.data[comp.name] = self.data[comp.name].astype(str)
|
|
198
229
|
other.data[comp.name] = other.data[comp.name].astype(str)
|
|
199
230
|
self.data[comp.name] = self.data[comp.name].map(
|
|
200
|
-
lambda x: str(TimePeriodHandler(x)) if x != "" else "", na_action=
|
|
231
|
+
lambda x: str(TimePeriodHandler(x)) if x != "" else "", na_action="ignore"
|
|
232
|
+
)
|
|
201
233
|
other.data[comp.name] = other.data[comp.name].map(
|
|
202
|
-
lambda x: str(TimePeriodHandler(x)) if x != "" else "", na_action=
|
|
203
|
-
|
|
204
|
-
|
|
234
|
+
lambda x: str(TimePeriodHandler(x)) if x != "" else "", na_action="ignore"
|
|
235
|
+
)
|
|
236
|
+
elif type_name in ["Integer", "Number"]:
|
|
237
|
+
if type_name == "Integer":
|
|
205
238
|
type_ = "int64"
|
|
206
239
|
else:
|
|
207
240
|
type_ = "float32"
|
|
208
241
|
# We use here a number to avoid errors on equality on empty strings
|
|
209
|
-
self.data[comp.name] =
|
|
210
|
-
|
|
242
|
+
self.data[comp.name] = (
|
|
243
|
+
self.data[comp.name]
|
|
244
|
+
.replace("", -1234997)
|
|
245
|
+
.astype(type_) # type: ignore[call-overload]
|
|
246
|
+
)
|
|
247
|
+
other.data[comp.name] = (
|
|
248
|
+
other.data[comp.name]
|
|
249
|
+
.replace("", -1234997)
|
|
250
|
+
.astype(type_) # type: ignore[call-overload]
|
|
251
|
+
)
|
|
211
252
|
try:
|
|
212
|
-
assert_frame_equal(
|
|
213
|
-
|
|
214
|
-
|
|
253
|
+
assert_frame_equal(
|
|
254
|
+
self.data,
|
|
255
|
+
other.data,
|
|
256
|
+
check_dtype=False,
|
|
257
|
+
check_index_type=False,
|
|
258
|
+
check_datetimelike_compat=True,
|
|
259
|
+
check_exact=False,
|
|
260
|
+
rtol=0.01,
|
|
261
|
+
atol=0.01,
|
|
262
|
+
)
|
|
215
263
|
except AssertionError as e:
|
|
216
264
|
if "DataFrame shape" in str(e):
|
|
217
265
|
print(f"\nDataFrame shape mismatch {self.name}:")
|
|
@@ -223,7 +271,7 @@ class Dataset:
|
|
|
223
271
|
return True
|
|
224
272
|
# To display actual null values instead of -1234997
|
|
225
273
|
for comp in self.components.values():
|
|
226
|
-
if comp.data_type.__name__ in [
|
|
274
|
+
if comp.data_type.__name__.__str__() in ["Integer", "Number"]:
|
|
227
275
|
diff[comp.name] = diff[comp.name].replace(-1234997, "")
|
|
228
276
|
print("\n Differences between the dataframes in", self.name)
|
|
229
277
|
print(diff)
|
|
@@ -233,12 +281,12 @@ class Dataset:
|
|
|
233
281
|
def get_component(self, component_name: str) -> Component:
|
|
234
282
|
return self.components[component_name]
|
|
235
283
|
|
|
236
|
-
def add_component(self, component: Component):
|
|
284
|
+
def add_component(self, component: Component) -> None:
|
|
237
285
|
if component.name in self.components:
|
|
238
286
|
raise ValueError(f"Component with name {component.name} already exists")
|
|
239
287
|
self.components[component.name] = component
|
|
240
288
|
|
|
241
|
-
def delete_component(self, component_name: str):
|
|
289
|
+
def delete_component(self, component_name: str) -> None:
|
|
242
290
|
self.components.pop(component_name, None)
|
|
243
291
|
if self.data is not None:
|
|
244
292
|
self.data.drop(columns=[component_name], inplace=True)
|
|
@@ -247,63 +295,67 @@ class Dataset:
|
|
|
247
295
|
return list(self.components.values())
|
|
248
296
|
|
|
249
297
|
def get_identifiers(self) -> List[Component]:
|
|
250
|
-
return [
|
|
251
|
-
|
|
298
|
+
return [
|
|
299
|
+
component for component in self.components.values() if component.role == Role.IDENTIFIER
|
|
300
|
+
]
|
|
252
301
|
|
|
253
302
|
def get_attributes(self) -> List[Component]:
|
|
254
|
-
return [
|
|
255
|
-
|
|
303
|
+
return [
|
|
304
|
+
component for component in self.components.values() if component.role == Role.ATTRIBUTE
|
|
305
|
+
]
|
|
256
306
|
|
|
257
307
|
def get_measures(self) -> List[Component]:
|
|
258
|
-
return [
|
|
259
|
-
|
|
308
|
+
return [
|
|
309
|
+
component for component in self.components.values() if component.role == Role.MEASURE
|
|
310
|
+
]
|
|
260
311
|
|
|
261
312
|
def get_identifiers_names(self) -> List[str]:
|
|
262
|
-
return [
|
|
263
|
-
|
|
313
|
+
return [
|
|
314
|
+
name for name, component in self.components.items() if component.role == Role.IDENTIFIER
|
|
315
|
+
]
|
|
264
316
|
|
|
265
317
|
def get_attributes_names(self) -> List[str]:
|
|
266
|
-
return [
|
|
267
|
-
|
|
318
|
+
return [
|
|
319
|
+
name for name, component in self.components.items() if component.role == Role.ATTRIBUTE
|
|
320
|
+
]
|
|
268
321
|
|
|
269
322
|
def get_measures_names(self) -> List[str]:
|
|
270
|
-
return [
|
|
271
|
-
|
|
323
|
+
return [
|
|
324
|
+
name for name, component in self.components.items() if component.role == Role.MEASURE
|
|
325
|
+
]
|
|
272
326
|
|
|
273
327
|
def get_components_names(self) -> List[str]:
|
|
274
328
|
return list(self.components.keys())
|
|
275
329
|
|
|
276
330
|
@classmethod
|
|
277
|
-
def from_json(cls, json_str):
|
|
278
|
-
components = {k: Component.from_json(v) for k, v in json_str[
|
|
279
|
-
return cls(json_str[
|
|
331
|
+
def from_json(cls, json_str: Any) -> "Dataset":
|
|
332
|
+
components = {k: Component.from_json(v) for k, v in json_str["components"].items()}
|
|
333
|
+
return cls(json_str["name"], components, pd.DataFrame(json_str["data"]))
|
|
280
334
|
|
|
281
|
-
def to_dict(self):
|
|
335
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
282
336
|
return {
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
337
|
+
"name": self.name,
|
|
338
|
+
"components": {k: v.to_dict() for k, v in self.components.items()},
|
|
339
|
+
"data": self.data.to_dict(orient="records") if self.data is not None else None,
|
|
286
340
|
}
|
|
287
341
|
|
|
288
|
-
def to_json(self):
|
|
342
|
+
def to_json(self) -> str:
|
|
289
343
|
return json.dumps(self.to_dict(), indent=4)
|
|
290
344
|
|
|
291
|
-
def to_json_datastructure(self):
|
|
292
|
-
dict_dataset = self.to_dict()[
|
|
293
|
-
order_keys = [
|
|
345
|
+
def to_json_datastructure(self) -> str:
|
|
346
|
+
dict_dataset = self.to_dict()["components"]
|
|
347
|
+
order_keys = ["name", "role", "type", "nullable"]
|
|
294
348
|
# Rename data_type to type
|
|
295
349
|
for k in dict_dataset:
|
|
296
|
-
dict_dataset[k] = {
|
|
297
|
-
|
|
350
|
+
dict_dataset[k] = {
|
|
351
|
+
ik if ik != "data_type" else "type": v for ik, v in dict_dataset[k].items()
|
|
352
|
+
}
|
|
298
353
|
|
|
299
354
|
# Order keys
|
|
300
355
|
for k in dict_dataset:
|
|
301
356
|
dict_dataset[k] = {ik: dict_dataset[k][ik] for ik in order_keys}
|
|
302
357
|
comp_values = list(dict_dataset.values())
|
|
303
|
-
ds_info = {
|
|
304
|
-
'name': self.name,
|
|
305
|
-
'DataStructure': comp_values
|
|
306
|
-
}
|
|
358
|
+
ds_info = {"name": self.name, "DataStructure": comp_values}
|
|
307
359
|
result = {"datasets": [ds_info]}
|
|
308
360
|
return json.dumps(result, indent=2)
|
|
309
361
|
|
|
@@ -313,10 +365,11 @@ class ScalarSet:
|
|
|
313
365
|
"""
|
|
314
366
|
Class representing a set of scalar values
|
|
315
367
|
"""
|
|
316
|
-
|
|
368
|
+
|
|
369
|
+
data_type: Type[ScalarType]
|
|
317
370
|
values: List[Union[int, float, str, bool]]
|
|
318
371
|
|
|
319
|
-
def __contains__(self, item):
|
|
372
|
+
def __contains__(self, item: str) -> Optional[bool]:
|
|
320
373
|
if isinstance(item, float) and item.is_integer():
|
|
321
374
|
item = int(item)
|
|
322
375
|
if self.data_type == DataTypes.Null:
|
|
@@ -330,21 +383,23 @@ class ValueDomain:
|
|
|
330
383
|
"""
|
|
331
384
|
Class representing a value domain
|
|
332
385
|
"""
|
|
386
|
+
|
|
333
387
|
name: str
|
|
334
|
-
type: ScalarType
|
|
388
|
+
type: Type[ScalarType]
|
|
335
389
|
setlist: List[Union[int, float, str, bool]]
|
|
336
390
|
|
|
337
|
-
def __post_init__(self):
|
|
391
|
+
def __post_init__(self) -> None:
|
|
338
392
|
if len(set(self.setlist)) != len(self.setlist):
|
|
339
393
|
duplicated = [item for item, count in Counter(self.setlist).items() if count > 1]
|
|
340
394
|
raise ValueError(
|
|
341
|
-
f"The setlist must have unique values. Duplicated values: {duplicated}"
|
|
395
|
+
f"The setlist must have unique values. Duplicated values: {duplicated}"
|
|
396
|
+
)
|
|
342
397
|
|
|
343
398
|
# Cast values to the correct type
|
|
344
399
|
self.setlist = [self.type.cast(value) for value in self.setlist]
|
|
345
400
|
|
|
346
401
|
@classmethod
|
|
347
|
-
def from_json(cls, json_str: str):
|
|
402
|
+
def from_json(cls, json_str: str) -> str:
|
|
348
403
|
if len(json_str) == 0:
|
|
349
404
|
raise ValueError("Empty JSON string for ValueDomain")
|
|
350
405
|
|
|
@@ -352,27 +407,22 @@ class ValueDomain:
|
|
|
352
407
|
return cls.from_dict(json_info)
|
|
353
408
|
|
|
354
409
|
@classmethod
|
|
355
|
-
def from_dict(cls, value:
|
|
356
|
-
for x in (
|
|
410
|
+
def from_dict(cls, value: Dict[str, Any]) -> Any:
|
|
411
|
+
for x in ("name", "type", "setlist"):
|
|
357
412
|
if x not in value:
|
|
358
|
-
raise Exception(
|
|
359
|
-
if value[
|
|
360
|
-
raise ValueError(
|
|
361
|
-
f"Invalid data type {value['type']} for ValueDomain {value['name']}")
|
|
413
|
+
raise Exception("Invalid format for ValueDomain. Requires name, type and setlist.")
|
|
414
|
+
if value["type"] not in SCALAR_TYPES:
|
|
415
|
+
raise ValueError(f"Invalid data type {value['type']} for ValueDomain {value['name']}")
|
|
362
416
|
|
|
363
|
-
return cls(value[
|
|
417
|
+
return cls(value["name"], SCALAR_TYPES[value["type"]], value["setlist"])
|
|
364
418
|
|
|
365
|
-
def to_dict(self):
|
|
366
|
-
return {
|
|
367
|
-
'name': self.name,
|
|
368
|
-
'type': self.type.__name__,
|
|
369
|
-
'setlist': self.setlist
|
|
370
|
-
}
|
|
419
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
420
|
+
return {"name": self.name, "type": self.type.__name__, "setlist": self.setlist}
|
|
371
421
|
|
|
372
422
|
def to_json(self) -> str:
|
|
373
423
|
return json.dumps(self.to_dict(), indent=4)
|
|
374
424
|
|
|
375
|
-
def __eq__(self, other):
|
|
425
|
+
def __eq__(self, other: Any) -> bool:
|
|
376
426
|
return self.to_dict() == other.to_dict()
|
|
377
427
|
|
|
378
428
|
|
|
@@ -381,17 +431,18 @@ class ExternalRoutine:
|
|
|
381
431
|
"""
|
|
382
432
|
Class representing an external routine, used in Eval operator
|
|
383
433
|
"""
|
|
434
|
+
|
|
384
435
|
dataset_names: List[str]
|
|
385
436
|
query: str
|
|
386
437
|
name: str
|
|
387
438
|
|
|
388
439
|
@classmethod
|
|
389
|
-
def from_sql_query(cls, name: str, query: str):
|
|
440
|
+
def from_sql_query(cls, name: str, query: str) -> "ExternalRoutine":
|
|
390
441
|
dataset_names = cls._extract_dataset_names(query)
|
|
391
442
|
return cls(dataset_names, query, name)
|
|
392
443
|
|
|
393
444
|
@classmethod
|
|
394
|
-
def _extract_dataset_names(cls, query) -> List[str]:
|
|
445
|
+
def _extract_dataset_names(cls, query: str) -> List[str]:
|
|
395
446
|
expression = sqlglot.parse_one(query, read="sqlite")
|
|
396
447
|
tables_info = list(expression.find_all(exp.Table))
|
|
397
448
|
dataset_names = [t.name for t in tables_info]
|