vtlengine 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +153 -100
- vtlengine/API/__init__.py +109 -67
- vtlengine/AST/ASTConstructor.py +188 -98
- vtlengine/AST/ASTConstructorModules/Expr.py +306 -200
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +172 -102
- vtlengine/AST/ASTConstructorModules/Terminals.py +158 -95
- vtlengine/AST/ASTEncoders.py +1 -1
- vtlengine/AST/ASTTemplate.py +8 -9
- vtlengine/AST/ASTVisitor.py +8 -12
- vtlengine/AST/DAG/__init__.py +43 -35
- vtlengine/AST/DAG/_words.py +4 -4
- vtlengine/AST/Grammar/lexer.py +732 -142
- vtlengine/AST/Grammar/parser.py +2188 -826
- vtlengine/AST/Grammar/tokens.py +128 -128
- vtlengine/AST/VtlVisitor.py +7 -4
- vtlengine/AST/__init__.py +22 -11
- vtlengine/DataTypes/NumericTypesHandling.py +5 -4
- vtlengine/DataTypes/TimeHandling.py +194 -301
- vtlengine/DataTypes/__init__.py +304 -218
- vtlengine/Exceptions/__init__.py +52 -27
- vtlengine/Exceptions/messages.py +134 -62
- vtlengine/Interpreter/__init__.py +781 -487
- vtlengine/Model/__init__.py +165 -121
- vtlengine/Operators/Aggregation.py +156 -95
- vtlengine/Operators/Analytic.py +115 -59
- vtlengine/Operators/Assignment.py +7 -4
- vtlengine/Operators/Boolean.py +27 -32
- vtlengine/Operators/CastOperator.py +177 -131
- vtlengine/Operators/Clause.py +137 -99
- vtlengine/Operators/Comparison.py +148 -117
- vtlengine/Operators/Conditional.py +149 -98
- vtlengine/Operators/General.py +68 -47
- vtlengine/Operators/HROperators.py +91 -72
- vtlengine/Operators/Join.py +217 -118
- vtlengine/Operators/Numeric.py +89 -44
- vtlengine/Operators/RoleSetter.py +16 -15
- vtlengine/Operators/Set.py +61 -36
- vtlengine/Operators/String.py +213 -139
- vtlengine/Operators/Time.py +334 -216
- vtlengine/Operators/Validation.py +117 -76
- vtlengine/Operators/__init__.py +340 -213
- vtlengine/Utils/__init__.py +195 -40
- vtlengine/__init__.py +1 -1
- vtlengine/files/output/__init__.py +15 -6
- vtlengine/files/output/_time_period_representation.py +10 -9
- vtlengine/files/parser/__init__.py +77 -52
- vtlengine/files/parser/_rfc_dialect.py +6 -5
- vtlengine/files/parser/_time_checking.py +46 -37
- vtlengine-1.0.1.dist-info/METADATA +236 -0
- vtlengine-1.0.1.dist-info/RECORD +58 -0
- {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/WHEEL +1 -1
- vtlengine-1.0.dist-info/METADATA +0 -104
- vtlengine-1.0.dist-info/RECORD +0 -58
- {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/LICENSE.md +0 -0
vtlengine/Model/__init__.py
CHANGED
|
@@ -2,7 +2,7 @@ import json
|
|
|
2
2
|
from collections import Counter
|
|
3
3
|
from dataclasses import dataclass
|
|
4
4
|
from enum import Enum
|
|
5
|
-
from typing import Dict, List, Optional, Union
|
|
5
|
+
from typing import Dict, List, Optional, Union, Any, Type
|
|
6
6
|
|
|
7
7
|
import vtlengine.DataTypes as DataTypes
|
|
8
8
|
import pandas as pd
|
|
@@ -10,7 +10,7 @@ import sqlglot
|
|
|
10
10
|
import sqlglot.expressions as exp
|
|
11
11
|
from vtlengine.DataTypes import SCALAR_TYPES, ScalarType
|
|
12
12
|
from vtlengine.DataTypes.TimeHandling import TimePeriodHandler
|
|
13
|
-
from pandas import DataFrame as PandasDataFrame
|
|
13
|
+
from pandas import DataFrame as PandasDataFrame
|
|
14
14
|
from pandas._testing import assert_frame_equal
|
|
15
15
|
|
|
16
16
|
|
|
@@ -22,16 +22,17 @@ class Scalar:
|
|
|
22
22
|
"""
|
|
23
23
|
Class representing a scalar value
|
|
24
24
|
"""
|
|
25
|
+
|
|
25
26
|
name: str
|
|
26
|
-
data_type: ScalarType
|
|
27
|
-
value:
|
|
27
|
+
data_type: Type[ScalarType]
|
|
28
|
+
value: Any
|
|
28
29
|
|
|
29
30
|
@classmethod
|
|
30
|
-
def from_json(cls, json_str):
|
|
31
|
+
def from_json(cls, json_str: str) -> "Scalar":
|
|
31
32
|
data = json.loads(json_str)
|
|
32
|
-
return cls(data[
|
|
33
|
+
return cls(data["name"], SCALAR_TYPES[data["data_type"]], data["value"])
|
|
33
34
|
|
|
34
|
-
def __eq__(self, other):
|
|
35
|
+
def __eq__(self, other: Any) -> bool:
|
|
35
36
|
same_name = self.name == other.name
|
|
36
37
|
same_type = self.data_type == other.data_type
|
|
37
38
|
x = None if not pd.isnull(self.value) else self.value
|
|
@@ -44,6 +45,7 @@ class Role(Enum):
|
|
|
44
45
|
"""
|
|
45
46
|
Enum class for the role of a component (Identifier, Attribute, Measure)
|
|
46
47
|
"""
|
|
48
|
+
|
|
47
49
|
IDENTIFIER = "Identifier"
|
|
48
50
|
ATTRIBUTE = "Attribute"
|
|
49
51
|
MEASURE = "Measure"
|
|
@@ -52,32 +54,38 @@ class Role(Enum):
|
|
|
52
54
|
@dataclass
|
|
53
55
|
class DataComponent:
|
|
54
56
|
"""A component of a dataset with data"""
|
|
57
|
+
|
|
55
58
|
name: str
|
|
56
59
|
# data: Optional[Union[PandasSeries, SparkSeries]]
|
|
57
|
-
data: Optional[
|
|
58
|
-
data_type: ScalarType
|
|
60
|
+
data: Optional[Any]
|
|
61
|
+
data_type: Type[ScalarType]
|
|
59
62
|
role: Role = Role.MEASURE
|
|
60
63
|
nullable: bool = True
|
|
61
64
|
|
|
62
|
-
def __eq__(self, other):
|
|
65
|
+
def __eq__(self, other: Any) -> bool:
|
|
63
66
|
if not isinstance(other, DataComponent):
|
|
64
67
|
return False
|
|
65
68
|
return self.to_dict() == other.to_dict()
|
|
66
69
|
|
|
67
70
|
@classmethod
|
|
68
|
-
def from_json(cls, json_str):
|
|
69
|
-
return cls(
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
71
|
+
def from_json(cls, json_str: Any) -> "DataComponent":
|
|
72
|
+
return cls(
|
|
73
|
+
json_str["name"],
|
|
74
|
+
None,
|
|
75
|
+
SCALAR_TYPES[json_str["data_type"]],
|
|
76
|
+
Role(json_str["role"]),
|
|
77
|
+
json_str["nullable"],
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
73
81
|
return {
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
82
|
+
"name": self.name,
|
|
83
|
+
"data": self.data,
|
|
84
|
+
"data_type": self.data_type,
|
|
85
|
+
"role": self.role,
|
|
78
86
|
}
|
|
79
87
|
|
|
80
|
-
def to_json(self):
|
|
88
|
+
def to_json(self) -> str:
|
|
81
89
|
return json.dumps(self.to_dict(), indent=4)
|
|
82
90
|
|
|
83
91
|
|
|
@@ -86,58 +94,69 @@ class Component:
|
|
|
86
94
|
"""
|
|
87
95
|
Class representing a component of a dataset
|
|
88
96
|
"""
|
|
97
|
+
|
|
89
98
|
name: str
|
|
90
|
-
data_type: ScalarType
|
|
99
|
+
data_type: Type[ScalarType]
|
|
91
100
|
role: Role
|
|
92
101
|
nullable: bool
|
|
93
102
|
|
|
94
|
-
def __post_init__(self):
|
|
103
|
+
def __post_init__(self) -> None:
|
|
95
104
|
if self.role == Role.IDENTIFIER and self.nullable:
|
|
96
105
|
raise ValueError(f"Identifier {self.name} cannot be nullable")
|
|
97
106
|
|
|
98
|
-
def __eq__(self, other):
|
|
107
|
+
def __eq__(self, other: Any) -> bool:
|
|
99
108
|
return self.to_dict() == other.to_dict()
|
|
100
109
|
|
|
101
|
-
def copy(self):
|
|
110
|
+
def copy(self) -> "Component":
|
|
102
111
|
return Component(self.name, self.data_type, self.role, self.nullable)
|
|
103
112
|
|
|
104
113
|
@classmethod
|
|
105
|
-
def from_json(cls, json_str):
|
|
106
|
-
return cls(
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
114
|
+
def from_json(cls, json_str: Any) -> "Component":
|
|
115
|
+
return cls(
|
|
116
|
+
json_str["name"],
|
|
117
|
+
SCALAR_TYPES[json_str["data_type"]],
|
|
118
|
+
Role(json_str["role"]),
|
|
119
|
+
json_str["nullable"],
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
110
123
|
return {
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
124
|
+
"name": self.name,
|
|
125
|
+
"data_type": DataTypes.SCALAR_TYPES_CLASS_REVERSE[self.data_type],
|
|
126
|
+
"role": self.role.value,
|
|
127
|
+
"nullable": self.nullable,
|
|
115
128
|
}
|
|
116
129
|
|
|
117
|
-
def to_json(self):
|
|
118
|
-
return json.dumps(self.to_dict()
|
|
130
|
+
def to_json(self) -> str:
|
|
131
|
+
return json.dumps(self.to_dict())
|
|
119
132
|
|
|
120
|
-
def rename(self, new_name: str):
|
|
133
|
+
def rename(self, new_name: str) -> None:
|
|
121
134
|
self.name = new_name
|
|
122
135
|
|
|
136
|
+
def __str__(self) -> str:
|
|
137
|
+
return self.to_json()
|
|
138
|
+
|
|
139
|
+
__repr__ = __str__
|
|
140
|
+
|
|
123
141
|
|
|
124
142
|
@dataclass
|
|
125
143
|
class Dataset:
|
|
126
144
|
name: str
|
|
127
145
|
components: Dict[str, Component]
|
|
128
146
|
# data: Optional[Union[SparkDataFrame, PandasDataFrame]]
|
|
129
|
-
data: Optional[PandasDataFrame]
|
|
147
|
+
data: Optional[PandasDataFrame] = None
|
|
130
148
|
|
|
131
|
-
def __post_init__(self):
|
|
149
|
+
def __post_init__(self) -> None:
|
|
132
150
|
if self.data is not None:
|
|
133
151
|
if len(self.components) != len(self.data.columns):
|
|
134
152
|
raise ValueError(
|
|
135
|
-
"The number of components must match the number of columns in the data"
|
|
153
|
+
"The number of components must match the number of columns in the data"
|
|
154
|
+
)
|
|
136
155
|
for name, component in self.components.items():
|
|
137
156
|
if name not in self.data.columns:
|
|
138
157
|
raise ValueError(f"Component {name} not found in the data")
|
|
139
158
|
|
|
140
|
-
def __eq__(self, other):
|
|
159
|
+
def __eq__(self, other: Any) -> bool:
|
|
141
160
|
if not isinstance(other, Dataset):
|
|
142
161
|
return False
|
|
143
162
|
|
|
@@ -149,22 +168,30 @@ class Dataset:
|
|
|
149
168
|
same_components = self.components == other.components
|
|
150
169
|
if not same_components:
|
|
151
170
|
print("\nComponents mismatch")
|
|
152
|
-
result_comps = self.to_dict()[
|
|
153
|
-
reference_comps = other.to_dict()[
|
|
171
|
+
result_comps = self.to_dict()["components"]
|
|
172
|
+
reference_comps = other.to_dict()["components"]
|
|
154
173
|
if len(result_comps) != len(reference_comps):
|
|
155
174
|
print(
|
|
156
|
-
f"Shape mismatch: result:{len(result_comps)}
|
|
175
|
+
f"Shape mismatch: result:{len(result_comps)} "
|
|
176
|
+
f"!= reference:{len(reference_comps)}"
|
|
177
|
+
)
|
|
157
178
|
if len(result_comps) < len(reference_comps):
|
|
158
|
-
print(
|
|
159
|
-
|
|
179
|
+
print(
|
|
180
|
+
"Missing components in result:",
|
|
181
|
+
set(reference_comps.keys()) - set(result_comps.keys()),
|
|
182
|
+
)
|
|
160
183
|
else:
|
|
161
|
-
print(
|
|
162
|
-
|
|
184
|
+
print(
|
|
185
|
+
"Additional components in result:",
|
|
186
|
+
set(result_comps.keys()) - set(reference_comps.keys()),
|
|
187
|
+
)
|
|
163
188
|
return False
|
|
164
189
|
|
|
165
|
-
diff_comps = {
|
|
166
|
-
|
|
167
|
-
|
|
190
|
+
diff_comps = {
|
|
191
|
+
k: v
|
|
192
|
+
for k, v in result_comps.items()
|
|
193
|
+
if (k in reference_comps and v != reference_comps[k]) or k not in reference_comps
|
|
194
|
+
}
|
|
168
195
|
ref_diff_comps = {k: v for k, v in reference_comps.items() if k in diff_comps}
|
|
169
196
|
print(f"Differences in components {self.name}: ")
|
|
170
197
|
print("result:", json.dumps(diff_comps, indent=4))
|
|
@@ -173,45 +200,59 @@ class Dataset:
|
|
|
173
200
|
|
|
174
201
|
if self.data is None and other.data is None:
|
|
175
202
|
return True
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
# if isinstance(other.data, SparkDataFrame):
|
|
179
|
-
# other.data = other.data.to_pandas()
|
|
203
|
+
elif self.data is None or other.data is None:
|
|
204
|
+
return False
|
|
180
205
|
if len(self.data) == len(other.data) == 0:
|
|
181
206
|
assert self.data.shape == other.data.shape
|
|
182
207
|
|
|
183
208
|
self.data.fillna("", inplace=True)
|
|
184
209
|
other.data.fillna("", inplace=True)
|
|
185
|
-
# self.data = self.data.sort_values(by=self.get_identifiers_names()).reset_index(drop=True)
|
|
186
|
-
# other.data = other.data.sort_values(by=other.get_identifiers_names().sort()).reset_index(drop=True)
|
|
187
210
|
sorted_identifiers = sorted(self.get_identifiers_names())
|
|
188
211
|
self.data = self.data.sort_values(by=sorted_identifiers).reset_index(drop=True)
|
|
189
212
|
other.data = other.data.sort_values(by=sorted_identifiers).reset_index(drop=True)
|
|
190
213
|
self.data = self.data.reindex(sorted(self.data.columns), axis=1)
|
|
191
214
|
other.data = other.data.reindex(sorted(other.data.columns), axis=1)
|
|
192
215
|
for comp in self.components.values():
|
|
193
|
-
|
|
216
|
+
type_name: str = comp.data_type.__name__.__str__()
|
|
217
|
+
if type_name in ["String", "Date"]:
|
|
194
218
|
self.data[comp.name] = self.data[comp.name].astype(str)
|
|
195
219
|
other.data[comp.name] = other.data[comp.name].astype(str)
|
|
196
|
-
elif
|
|
220
|
+
elif type_name == "TimePeriod":
|
|
197
221
|
self.data[comp.name] = self.data[comp.name].astype(str)
|
|
198
222
|
other.data[comp.name] = other.data[comp.name].astype(str)
|
|
199
223
|
self.data[comp.name] = self.data[comp.name].map(
|
|
200
|
-
lambda x: str(TimePeriodHandler(x)) if x != "" else "", na_action=
|
|
224
|
+
lambda x: str(TimePeriodHandler(x)) if x != "" else "", na_action="ignore"
|
|
225
|
+
)
|
|
201
226
|
other.data[comp.name] = other.data[comp.name].map(
|
|
202
|
-
lambda x: str(TimePeriodHandler(x)) if x != "" else "", na_action=
|
|
203
|
-
|
|
204
|
-
|
|
227
|
+
lambda x: str(TimePeriodHandler(x)) if x != "" else "", na_action="ignore"
|
|
228
|
+
)
|
|
229
|
+
elif type_name in ["Integer", "Number"]:
|
|
230
|
+
if type_name == "Integer":
|
|
205
231
|
type_ = "int64"
|
|
206
232
|
else:
|
|
207
233
|
type_ = "float32"
|
|
208
234
|
# We use here a number to avoid errors on equality on empty strings
|
|
209
|
-
self.data[comp.name] =
|
|
210
|
-
|
|
235
|
+
self.data[comp.name] = (
|
|
236
|
+
self.data[comp.name]
|
|
237
|
+
.replace("", -1234997)
|
|
238
|
+
.astype(type_) # type: ignore[call-overload]
|
|
239
|
+
)
|
|
240
|
+
other.data[comp.name] = (
|
|
241
|
+
other.data[comp.name]
|
|
242
|
+
.replace("", -1234997)
|
|
243
|
+
.astype(type_) # type: ignore[call-overload]
|
|
244
|
+
)
|
|
211
245
|
try:
|
|
212
|
-
assert_frame_equal(
|
|
213
|
-
|
|
214
|
-
|
|
246
|
+
assert_frame_equal(
|
|
247
|
+
self.data,
|
|
248
|
+
other.data,
|
|
249
|
+
check_dtype=False,
|
|
250
|
+
check_index_type=False,
|
|
251
|
+
check_datetimelike_compat=True,
|
|
252
|
+
check_exact=False,
|
|
253
|
+
rtol=0.01,
|
|
254
|
+
atol=0.01,
|
|
255
|
+
)
|
|
215
256
|
except AssertionError as e:
|
|
216
257
|
if "DataFrame shape" in str(e):
|
|
217
258
|
print(f"\nDataFrame shape mismatch {self.name}:")
|
|
@@ -223,7 +264,7 @@ class Dataset:
|
|
|
223
264
|
return True
|
|
224
265
|
# To display actual null values instead of -1234997
|
|
225
266
|
for comp in self.components.values():
|
|
226
|
-
if comp.data_type.__name__ in [
|
|
267
|
+
if comp.data_type.__name__.__str__() in ["Integer", "Number"]:
|
|
227
268
|
diff[comp.name] = diff[comp.name].replace(-1234997, "")
|
|
228
269
|
print("\n Differences between the dataframes in", self.name)
|
|
229
270
|
print(diff)
|
|
@@ -233,12 +274,12 @@ class Dataset:
|
|
|
233
274
|
def get_component(self, component_name: str) -> Component:
|
|
234
275
|
return self.components[component_name]
|
|
235
276
|
|
|
236
|
-
def add_component(self, component: Component):
|
|
277
|
+
def add_component(self, component: Component) -> None:
|
|
237
278
|
if component.name in self.components:
|
|
238
279
|
raise ValueError(f"Component with name {component.name} already exists")
|
|
239
280
|
self.components[component.name] = component
|
|
240
281
|
|
|
241
|
-
def delete_component(self, component_name: str):
|
|
282
|
+
def delete_component(self, component_name: str) -> None:
|
|
242
283
|
self.components.pop(component_name, None)
|
|
243
284
|
if self.data is not None:
|
|
244
285
|
self.data.drop(columns=[component_name], inplace=True)
|
|
@@ -247,63 +288,67 @@ class Dataset:
|
|
|
247
288
|
return list(self.components.values())
|
|
248
289
|
|
|
249
290
|
def get_identifiers(self) -> List[Component]:
|
|
250
|
-
return [
|
|
251
|
-
|
|
291
|
+
return [
|
|
292
|
+
component for component in self.components.values() if component.role == Role.IDENTIFIER
|
|
293
|
+
]
|
|
252
294
|
|
|
253
295
|
def get_attributes(self) -> List[Component]:
|
|
254
|
-
return [
|
|
255
|
-
|
|
296
|
+
return [
|
|
297
|
+
component for component in self.components.values() if component.role == Role.ATTRIBUTE
|
|
298
|
+
]
|
|
256
299
|
|
|
257
300
|
def get_measures(self) -> List[Component]:
|
|
258
|
-
return [
|
|
259
|
-
|
|
301
|
+
return [
|
|
302
|
+
component for component in self.components.values() if component.role == Role.MEASURE
|
|
303
|
+
]
|
|
260
304
|
|
|
261
305
|
def get_identifiers_names(self) -> List[str]:
|
|
262
|
-
return [
|
|
263
|
-
|
|
306
|
+
return [
|
|
307
|
+
name for name, component in self.components.items() if component.role == Role.IDENTIFIER
|
|
308
|
+
]
|
|
264
309
|
|
|
265
310
|
def get_attributes_names(self) -> List[str]:
|
|
266
|
-
return [
|
|
267
|
-
|
|
311
|
+
return [
|
|
312
|
+
name for name, component in self.components.items() if component.role == Role.ATTRIBUTE
|
|
313
|
+
]
|
|
268
314
|
|
|
269
315
|
def get_measures_names(self) -> List[str]:
|
|
270
|
-
return [
|
|
271
|
-
|
|
316
|
+
return [
|
|
317
|
+
name for name, component in self.components.items() if component.role == Role.MEASURE
|
|
318
|
+
]
|
|
272
319
|
|
|
273
320
|
def get_components_names(self) -> List[str]:
|
|
274
321
|
return list(self.components.keys())
|
|
275
322
|
|
|
276
323
|
@classmethod
|
|
277
|
-
def from_json(cls, json_str):
|
|
278
|
-
components = {k: Component.from_json(v) for k, v in json_str[
|
|
279
|
-
return cls(json_str[
|
|
324
|
+
def from_json(cls, json_str: Any) -> "Dataset":
|
|
325
|
+
components = {k: Component.from_json(v) for k, v in json_str["components"].items()}
|
|
326
|
+
return cls(json_str["name"], components, pd.DataFrame(json_str["data"]))
|
|
280
327
|
|
|
281
|
-
def to_dict(self):
|
|
328
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
282
329
|
return {
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
330
|
+
"name": self.name,
|
|
331
|
+
"components": {k: v.to_dict() for k, v in self.components.items()},
|
|
332
|
+
"data": self.data.to_dict(orient="records") if self.data is not None else None,
|
|
286
333
|
}
|
|
287
334
|
|
|
288
|
-
def to_json(self):
|
|
335
|
+
def to_json(self) -> str:
|
|
289
336
|
return json.dumps(self.to_dict(), indent=4)
|
|
290
337
|
|
|
291
|
-
def to_json_datastructure(self):
|
|
292
|
-
dict_dataset = self.to_dict()[
|
|
293
|
-
order_keys = [
|
|
338
|
+
def to_json_datastructure(self) -> str:
|
|
339
|
+
dict_dataset = self.to_dict()["components"]
|
|
340
|
+
order_keys = ["name", "role", "type", "nullable"]
|
|
294
341
|
# Rename data_type to type
|
|
295
342
|
for k in dict_dataset:
|
|
296
|
-
dict_dataset[k] = {
|
|
297
|
-
|
|
343
|
+
dict_dataset[k] = {
|
|
344
|
+
ik if ik != "data_type" else "type": v for ik, v in dict_dataset[k].items()
|
|
345
|
+
}
|
|
298
346
|
|
|
299
347
|
# Order keys
|
|
300
348
|
for k in dict_dataset:
|
|
301
349
|
dict_dataset[k] = {ik: dict_dataset[k][ik] for ik in order_keys}
|
|
302
350
|
comp_values = list(dict_dataset.values())
|
|
303
|
-
ds_info = {
|
|
304
|
-
'name': self.name,
|
|
305
|
-
'DataStructure': comp_values
|
|
306
|
-
}
|
|
351
|
+
ds_info = {"name": self.name, "DataStructure": comp_values}
|
|
307
352
|
result = {"datasets": [ds_info]}
|
|
308
353
|
return json.dumps(result, indent=2)
|
|
309
354
|
|
|
@@ -313,10 +358,11 @@ class ScalarSet:
|
|
|
313
358
|
"""
|
|
314
359
|
Class representing a set of scalar values
|
|
315
360
|
"""
|
|
316
|
-
|
|
361
|
+
|
|
362
|
+
data_type: Type[ScalarType]
|
|
317
363
|
values: List[Union[int, float, str, bool]]
|
|
318
364
|
|
|
319
|
-
def __contains__(self, item):
|
|
365
|
+
def __contains__(self, item: str) -> Optional[bool]:
|
|
320
366
|
if isinstance(item, float) and item.is_integer():
|
|
321
367
|
item = int(item)
|
|
322
368
|
if self.data_type == DataTypes.Null:
|
|
@@ -330,21 +376,23 @@ class ValueDomain:
|
|
|
330
376
|
"""
|
|
331
377
|
Class representing a value domain
|
|
332
378
|
"""
|
|
379
|
+
|
|
333
380
|
name: str
|
|
334
|
-
type: ScalarType
|
|
381
|
+
type: Type[ScalarType]
|
|
335
382
|
setlist: List[Union[int, float, str, bool]]
|
|
336
383
|
|
|
337
|
-
def __post_init__(self):
|
|
384
|
+
def __post_init__(self) -> None:
|
|
338
385
|
if len(set(self.setlist)) != len(self.setlist):
|
|
339
386
|
duplicated = [item for item, count in Counter(self.setlist).items() if count > 1]
|
|
340
387
|
raise ValueError(
|
|
341
|
-
f"The setlist must have unique values. Duplicated values: {duplicated}"
|
|
388
|
+
f"The setlist must have unique values. Duplicated values: {duplicated}"
|
|
389
|
+
)
|
|
342
390
|
|
|
343
391
|
# Cast values to the correct type
|
|
344
392
|
self.setlist = [self.type.cast(value) for value in self.setlist]
|
|
345
393
|
|
|
346
394
|
@classmethod
|
|
347
|
-
def from_json(cls, json_str: str):
|
|
395
|
+
def from_json(cls, json_str: str) -> str:
|
|
348
396
|
if len(json_str) == 0:
|
|
349
397
|
raise ValueError("Empty JSON string for ValueDomain")
|
|
350
398
|
|
|
@@ -352,27 +400,22 @@ class ValueDomain:
|
|
|
352
400
|
return cls.from_dict(json_info)
|
|
353
401
|
|
|
354
402
|
@classmethod
|
|
355
|
-
def from_dict(cls, value:
|
|
356
|
-
for x in (
|
|
403
|
+
def from_dict(cls, value: Dict[str, Any]) -> Any:
|
|
404
|
+
for x in ("name", "type", "setlist"):
|
|
357
405
|
if x not in value:
|
|
358
|
-
raise Exception(
|
|
359
|
-
if value[
|
|
360
|
-
raise ValueError(
|
|
361
|
-
f"Invalid data type {value['type']} for ValueDomain {value['name']}")
|
|
406
|
+
raise Exception("Invalid format for ValueDomain. Requires name, type and setlist.")
|
|
407
|
+
if value["type"] not in SCALAR_TYPES:
|
|
408
|
+
raise ValueError(f"Invalid data type {value['type']} for ValueDomain {value['name']}")
|
|
362
409
|
|
|
363
|
-
return cls(value[
|
|
410
|
+
return cls(value["name"], SCALAR_TYPES[value["type"]], value["setlist"])
|
|
364
411
|
|
|
365
|
-
def to_dict(self):
|
|
366
|
-
return {
|
|
367
|
-
'name': self.name,
|
|
368
|
-
'type': self.type.__name__,
|
|
369
|
-
'setlist': self.setlist
|
|
370
|
-
}
|
|
412
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
413
|
+
return {"name": self.name, "type": self.type.__name__, "setlist": self.setlist}
|
|
371
414
|
|
|
372
415
|
def to_json(self) -> str:
|
|
373
416
|
return json.dumps(self.to_dict(), indent=4)
|
|
374
417
|
|
|
375
|
-
def __eq__(self, other):
|
|
418
|
+
def __eq__(self, other: Any) -> bool:
|
|
376
419
|
return self.to_dict() == other.to_dict()
|
|
377
420
|
|
|
378
421
|
|
|
@@ -381,17 +424,18 @@ class ExternalRoutine:
|
|
|
381
424
|
"""
|
|
382
425
|
Class representing an external routine, used in Eval operator
|
|
383
426
|
"""
|
|
427
|
+
|
|
384
428
|
dataset_names: List[str]
|
|
385
429
|
query: str
|
|
386
430
|
name: str
|
|
387
431
|
|
|
388
432
|
@classmethod
|
|
389
|
-
def from_sql_query(cls, name: str, query: str):
|
|
433
|
+
def from_sql_query(cls, name: str, query: str) -> "ExternalRoutine":
|
|
390
434
|
dataset_names = cls._extract_dataset_names(query)
|
|
391
435
|
return cls(dataset_names, query, name)
|
|
392
436
|
|
|
393
437
|
@classmethod
|
|
394
|
-
def _extract_dataset_names(cls, query) -> List[str]:
|
|
438
|
+
def _extract_dataset_names(cls, query: str) -> List[str]:
|
|
395
439
|
expression = sqlglot.parse_one(query, read="sqlite")
|
|
396
440
|
tables_info = list(expression.find_all(exp.Table))
|
|
397
441
|
dataset_names = [t.name for t in tables_info]
|