vtlengine 1.0__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +159 -102
- vtlengine/API/__init__.py +110 -68
- vtlengine/AST/ASTConstructor.py +188 -98
- vtlengine/AST/ASTConstructorModules/Expr.py +402 -205
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +248 -104
- vtlengine/AST/ASTConstructorModules/Terminals.py +158 -95
- vtlengine/AST/ASTEncoders.py +1 -1
- vtlengine/AST/ASTTemplate.py +24 -9
- vtlengine/AST/ASTVisitor.py +8 -12
- vtlengine/AST/DAG/__init__.py +43 -35
- vtlengine/AST/DAG/_words.py +4 -4
- vtlengine/AST/Grammar/Vtl.g4 +49 -20
- vtlengine/AST/Grammar/VtlTokens.g4 +13 -1
- vtlengine/AST/Grammar/lexer.py +2012 -1312
- vtlengine/AST/Grammar/parser.py +7524 -4343
- vtlengine/AST/Grammar/tokens.py +140 -128
- vtlengine/AST/VtlVisitor.py +16 -5
- vtlengine/AST/__init__.py +41 -11
- vtlengine/DataTypes/NumericTypesHandling.py +5 -4
- vtlengine/DataTypes/TimeHandling.py +196 -301
- vtlengine/DataTypes/__init__.py +304 -218
- vtlengine/Exceptions/__init__.py +96 -27
- vtlengine/Exceptions/messages.py +149 -69
- vtlengine/Interpreter/__init__.py +817 -497
- vtlengine/Model/__init__.py +172 -121
- vtlengine/Operators/Aggregation.py +156 -95
- vtlengine/Operators/Analytic.py +167 -79
- vtlengine/Operators/Assignment.py +7 -4
- vtlengine/Operators/Boolean.py +27 -32
- vtlengine/Operators/CastOperator.py +177 -131
- vtlengine/Operators/Clause.py +137 -99
- vtlengine/Operators/Comparison.py +148 -117
- vtlengine/Operators/Conditional.py +290 -98
- vtlengine/Operators/General.py +68 -47
- vtlengine/Operators/HROperators.py +91 -72
- vtlengine/Operators/Join.py +217 -118
- vtlengine/Operators/Numeric.py +129 -46
- vtlengine/Operators/RoleSetter.py +16 -15
- vtlengine/Operators/Set.py +61 -36
- vtlengine/Operators/String.py +213 -139
- vtlengine/Operators/Time.py +467 -215
- vtlengine/Operators/Validation.py +117 -76
- vtlengine/Operators/__init__.py +340 -213
- vtlengine/Utils/__init__.py +232 -41
- vtlengine/__init__.py +1 -1
- vtlengine/files/output/__init__.py +15 -6
- vtlengine/files/output/_time_period_representation.py +10 -9
- vtlengine/files/parser/__init__.py +79 -52
- vtlengine/files/parser/_rfc_dialect.py +6 -5
- vtlengine/files/parser/_time_checking.py +48 -37
- vtlengine-1.0.2.dist-info/METADATA +245 -0
- vtlengine-1.0.2.dist-info/RECORD +58 -0
- {vtlengine-1.0.dist-info → vtlengine-1.0.2.dist-info}/WHEEL +1 -1
- vtlengine-1.0.dist-info/METADATA +0 -104
- vtlengine-1.0.dist-info/RECORD +0 -58
- {vtlengine-1.0.dist-info → vtlengine-1.0.2.dist-info}/LICENSE.md +0 -0
vtlengine/Operators/Set.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
import
|
|
2
|
-
from typing import List
|
|
1
|
+
from typing import List, Any, Dict
|
|
3
2
|
|
|
4
3
|
from vtlengine.Exceptions import SemanticError
|
|
5
4
|
|
|
6
|
-
if os.environ.get("SPARK"):
|
|
7
|
-
|
|
8
|
-
else:
|
|
9
|
-
|
|
5
|
+
# if os.environ.get("SPARK"):
|
|
6
|
+
# import pyspark.pandas as pd
|
|
7
|
+
# else:
|
|
8
|
+
# import pandas as pd
|
|
9
|
+
import pandas as pd
|
|
10
10
|
|
|
11
11
|
from vtlengine.Model import Dataset
|
|
12
12
|
from vtlengine.Operators import Operator
|
|
@@ -18,18 +18,22 @@ class Set(Operator):
|
|
|
18
18
|
@classmethod
|
|
19
19
|
def check_same_structure(cls, dataset_1: Dataset, dataset_2: Dataset) -> None:
|
|
20
20
|
if len(dataset_1.components) != len(dataset_2.components):
|
|
21
|
-
raise SemanticError(
|
|
22
|
-
|
|
21
|
+
raise SemanticError(
|
|
22
|
+
"1-1-17-1", op=cls.op, dataset_1=dataset_1.name, dataset_2=dataset_2.name
|
|
23
|
+
)
|
|
23
24
|
|
|
24
25
|
for comp in dataset_1.components.values():
|
|
25
26
|
if comp.name not in dataset_2.components:
|
|
26
27
|
raise Exception(f"Component {comp.name} not found in dataset {dataset_2.name}")
|
|
27
28
|
second_comp = dataset_2.components[comp.name]
|
|
28
|
-
binary_implicit_promotion(
|
|
29
|
-
|
|
29
|
+
binary_implicit_promotion(
|
|
30
|
+
comp.data_type, second_comp.data_type, cls.type_to_check, cls.return_type
|
|
31
|
+
)
|
|
30
32
|
if comp.role != second_comp.role:
|
|
31
|
-
raise Exception(
|
|
32
|
-
|
|
33
|
+
raise Exception(
|
|
34
|
+
f"Component {comp.name} has different roles "
|
|
35
|
+
f"in datasets {dataset_1.name} and {dataset_2.name}"
|
|
36
|
+
)
|
|
33
37
|
|
|
34
38
|
@classmethod
|
|
35
39
|
def validate(cls, operands: List[Dataset]) -> Dataset:
|
|
@@ -38,7 +42,7 @@ class Set(Operator):
|
|
|
38
42
|
for operand in operands[1:]:
|
|
39
43
|
cls.check_same_structure(base_operand, operand)
|
|
40
44
|
|
|
41
|
-
result_components = {}
|
|
45
|
+
result_components: Dict[str, Any] = {}
|
|
42
46
|
for operand in operands:
|
|
43
47
|
if len(result_components) == 0:
|
|
44
48
|
result_components = operand.components
|
|
@@ -46,7 +50,8 @@ class Set(Operator):
|
|
|
46
50
|
for comp_name, comp in operand.components.items():
|
|
47
51
|
current_comp = result_components[comp_name]
|
|
48
52
|
result_components[comp_name].data_type = binary_implicit_promotion(
|
|
49
|
-
current_comp.data_type, comp.data_type
|
|
53
|
+
current_comp.data_type, comp.data_type
|
|
54
|
+
)
|
|
50
55
|
result_components[comp_name].nullable = current_comp.nullable or comp.nullable
|
|
51
56
|
|
|
52
57
|
result = Dataset(name="result", components=result_components, data=None)
|
|
@@ -58,10 +63,9 @@ class Union(Set):
|
|
|
58
63
|
def evaluate(cls, operands: List[Dataset]) -> Dataset:
|
|
59
64
|
result = cls.validate(operands)
|
|
60
65
|
all_datapoints = [ds.data for ds in operands]
|
|
61
|
-
result.data = pd.concat(all_datapoints, sort=True,
|
|
62
|
-
ignore_index=True)
|
|
66
|
+
result.data = pd.concat(all_datapoints, sort=True, ignore_index=True)
|
|
63
67
|
identifiers_names = result.get_identifiers_names()
|
|
64
|
-
result.data = result.data.drop_duplicates(subset=identifiers_names, keep=
|
|
68
|
+
result.data = result.data.drop_duplicates(subset=identifiers_names, keep="first")
|
|
65
69
|
result.data.reset_index(drop=True, inplace=True)
|
|
66
70
|
return result
|
|
67
71
|
|
|
@@ -76,16 +80,22 @@ class Intersection(Set):
|
|
|
76
80
|
if result.data is None:
|
|
77
81
|
result.data = data
|
|
78
82
|
else:
|
|
79
|
-
|
|
80
|
-
|
|
83
|
+
if data is None:
|
|
84
|
+
result.data = pd.DataFrame(columns=result.get_identifiers_names())
|
|
85
|
+
break
|
|
86
|
+
result.data = result.data.merge(
|
|
87
|
+
data, how="inner", on=result.get_identifiers_names()
|
|
88
|
+
)
|
|
81
89
|
|
|
82
|
-
not_identifiers = [
|
|
83
|
-
|
|
90
|
+
not_identifiers = [
|
|
91
|
+
col for col in result.get_measures_names() + result.get_attributes_names()
|
|
92
|
+
]
|
|
84
93
|
|
|
85
94
|
for col in not_identifiers:
|
|
86
95
|
result.data[col] = result.data[col + "_x"]
|
|
87
96
|
result.data = result.data[result.get_identifiers_names() + not_identifiers]
|
|
88
|
-
result.data
|
|
97
|
+
if result.data is not None:
|
|
98
|
+
result.data.reset_index(drop=True, inplace=True)
|
|
89
99
|
return result
|
|
90
100
|
|
|
91
101
|
|
|
@@ -96,35 +106,46 @@ class Symdiff(Set):
|
|
|
96
106
|
result = cls.validate(operands)
|
|
97
107
|
all_datapoints = [ds.data for ds in operands]
|
|
98
108
|
for data in all_datapoints:
|
|
109
|
+
if data is None:
|
|
110
|
+
data = pd.DataFrame(columns=result.get_identifiers_names())
|
|
99
111
|
if result.data is None:
|
|
100
112
|
result.data = data
|
|
101
113
|
else:
|
|
102
114
|
# Realiza la operación equivalente en pyspark.pandas
|
|
103
|
-
result.data = result.data.merge(
|
|
104
|
-
|
|
105
|
-
|
|
115
|
+
result.data = result.data.merge(
|
|
116
|
+
data, how="outer", on=result.get_identifiers_names(), suffixes=("_x", "_y")
|
|
117
|
+
)
|
|
106
118
|
|
|
107
119
|
for measure in result.get_measures_names():
|
|
108
|
-
result.data[
|
|
109
|
-
lambda row:
|
|
110
|
-
|
|
111
|
-
|
|
120
|
+
result.data["_merge"] = result.data.apply(
|
|
121
|
+
lambda row: (
|
|
122
|
+
"left_only"
|
|
123
|
+
if pd.isnull(row[measure + "_y"])
|
|
124
|
+
else ("right_only" if pd.isnull(row[measure + "_x"]) else "both")
|
|
125
|
+
),
|
|
126
|
+
axis=1,
|
|
112
127
|
)
|
|
113
128
|
|
|
114
129
|
not_identifiers = result.get_measures_names() + result.get_attributes_names()
|
|
115
130
|
for col in not_identifiers:
|
|
116
131
|
result.data[col] = result.data.apply(
|
|
117
|
-
lambda x, c=col:
|
|
118
|
-
x[c +
|
|
132
|
+
lambda x, c=col: (
|
|
133
|
+
x[c + "_x"]
|
|
134
|
+
if x["_merge"] == "left_only"
|
|
135
|
+
else (x[c + "_y"] if x["_merge"] == "right_only" else None)
|
|
136
|
+
),
|
|
137
|
+
axis=1,
|
|
138
|
+
)
|
|
119
139
|
result.data = result.data[result.get_identifiers_names() + not_identifiers].dropna()
|
|
120
|
-
result.data
|
|
140
|
+
if result.data is not None:
|
|
141
|
+
result.data = result.data.reset_index(drop=True)
|
|
121
142
|
return result
|
|
122
143
|
|
|
123
144
|
|
|
124
145
|
class Setdiff(Set):
|
|
125
146
|
|
|
126
147
|
@staticmethod
|
|
127
|
-
def has_null(row):
|
|
148
|
+
def has_null(row: Any) -> bool:
|
|
128
149
|
return row.isnull().any()
|
|
129
150
|
|
|
130
151
|
@classmethod
|
|
@@ -135,12 +156,15 @@ class Setdiff(Set):
|
|
|
135
156
|
if result.data is None:
|
|
136
157
|
result.data = data
|
|
137
158
|
else:
|
|
159
|
+
if data is None:
|
|
160
|
+
data = pd.DataFrame(columns=result.get_identifiers_names())
|
|
138
161
|
result.data = result.data.merge(data, how="left", on=result.get_identifiers_names())
|
|
139
162
|
if len(result.data) > 0:
|
|
140
163
|
result.data = result.data[result.data.apply(cls.has_null, axis=1)]
|
|
141
164
|
|
|
142
|
-
not_identifiers = [
|
|
143
|
-
|
|
165
|
+
not_identifiers = [
|
|
166
|
+
col for col in result.get_measures_names() + result.get_attributes_names()
|
|
167
|
+
]
|
|
144
168
|
for col in not_identifiers:
|
|
145
169
|
if col + "_x" in result.data:
|
|
146
170
|
result.data[col] = result.data[col + "_x"]
|
|
@@ -148,5 +172,6 @@ class Setdiff(Set):
|
|
|
148
172
|
if col + "_y" in result.data:
|
|
149
173
|
del result.data[col + "_y"]
|
|
150
174
|
result.data = result.data[result.get_identifiers_names() + not_identifiers]
|
|
151
|
-
result.data
|
|
175
|
+
if result.data is not None:
|
|
176
|
+
result.data.reset_index(drop=True, inplace=True)
|
|
152
177
|
return result
|