vtlengine 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +153 -100
- vtlengine/API/__init__.py +109 -67
- vtlengine/AST/ASTConstructor.py +188 -98
- vtlengine/AST/ASTConstructorModules/Expr.py +306 -200
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +172 -102
- vtlengine/AST/ASTConstructorModules/Terminals.py +158 -95
- vtlengine/AST/ASTEncoders.py +1 -1
- vtlengine/AST/ASTTemplate.py +8 -9
- vtlengine/AST/ASTVisitor.py +8 -12
- vtlengine/AST/DAG/__init__.py +43 -35
- vtlengine/AST/DAG/_words.py +4 -4
- vtlengine/AST/Grammar/lexer.py +732 -142
- vtlengine/AST/Grammar/parser.py +2188 -826
- vtlengine/AST/Grammar/tokens.py +128 -128
- vtlengine/AST/VtlVisitor.py +7 -4
- vtlengine/AST/__init__.py +22 -11
- vtlengine/DataTypes/NumericTypesHandling.py +5 -4
- vtlengine/DataTypes/TimeHandling.py +194 -301
- vtlengine/DataTypes/__init__.py +304 -218
- vtlengine/Exceptions/__init__.py +52 -27
- vtlengine/Exceptions/messages.py +134 -62
- vtlengine/Interpreter/__init__.py +781 -487
- vtlengine/Model/__init__.py +165 -121
- vtlengine/Operators/Aggregation.py +156 -95
- vtlengine/Operators/Analytic.py +115 -59
- vtlengine/Operators/Assignment.py +7 -4
- vtlengine/Operators/Boolean.py +27 -32
- vtlengine/Operators/CastOperator.py +177 -131
- vtlengine/Operators/Clause.py +137 -99
- vtlengine/Operators/Comparison.py +148 -117
- vtlengine/Operators/Conditional.py +149 -98
- vtlengine/Operators/General.py +68 -47
- vtlengine/Operators/HROperators.py +91 -72
- vtlengine/Operators/Join.py +217 -118
- vtlengine/Operators/Numeric.py +89 -44
- vtlengine/Operators/RoleSetter.py +16 -15
- vtlengine/Operators/Set.py +61 -36
- vtlengine/Operators/String.py +213 -139
- vtlengine/Operators/Time.py +334 -216
- vtlengine/Operators/Validation.py +117 -76
- vtlengine/Operators/__init__.py +340 -213
- vtlengine/Utils/__init__.py +195 -40
- vtlengine/__init__.py +1 -1
- vtlengine/files/output/__init__.py +15 -6
- vtlengine/files/output/_time_period_representation.py +10 -9
- vtlengine/files/parser/__init__.py +77 -52
- vtlengine/files/parser/_rfc_dialect.py +6 -5
- vtlengine/files/parser/_time_checking.py +46 -37
- vtlengine-1.0.1.dist-info/METADATA +236 -0
- vtlengine-1.0.1.dist-info/RECORD +58 -0
- {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/WHEEL +1 -1
- vtlengine-1.0.dist-info/METADATA +0 -104
- vtlengine-1.0.dist-info/RECORD +0 -58
- {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/LICENSE.md +0 -0
vtlengine/Operators/Time.py
CHANGED
|
@@ -1,30 +1,35 @@
|
|
|
1
1
|
import re
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
2
4
|
from datetime import date
|
|
3
|
-
from
|
|
4
|
-
from typing import Optional, Union, List
|
|
5
|
+
from typing import Optional, Union, List, Any, Dict, Type
|
|
5
6
|
|
|
6
7
|
import vtlengine.Operators as Operators
|
|
7
|
-
import
|
|
8
|
-
from vtlengine.DataTypes import Date, TimePeriod, TimeInterval, Duration
|
|
8
|
+
from vtlengine.DataTypes import Date, TimePeriod, TimeInterval, Duration, ScalarType
|
|
9
9
|
from vtlengine.DataTypes.TimeHandling import DURATION_MAPPING, date_to_period, TimePeriodHandler
|
|
10
|
-
from dateutil.relativedelta import relativedelta
|
|
11
10
|
|
|
12
|
-
from vtlengine.AST.Grammar.tokens import
|
|
13
|
-
|
|
11
|
+
from vtlengine.AST.Grammar.tokens import (
|
|
12
|
+
TIME_AGG,
|
|
13
|
+
TIMESHIFT,
|
|
14
|
+
PERIOD_INDICATOR,
|
|
15
|
+
FILL_TIME_SERIES,
|
|
16
|
+
FLOW_TO_STOCK,
|
|
17
|
+
)
|
|
14
18
|
from vtlengine.Exceptions import SemanticError
|
|
15
19
|
from vtlengine.Model import Dataset, DataComponent, Scalar, Component, Role
|
|
16
20
|
|
|
17
21
|
|
|
18
22
|
class Time(Operators.Operator):
|
|
19
|
-
periods
|
|
20
|
-
time_id
|
|
21
|
-
other_ids
|
|
23
|
+
periods: Any
|
|
24
|
+
time_id: Any
|
|
25
|
+
other_ids: Any
|
|
26
|
+
measures: Any
|
|
22
27
|
|
|
23
28
|
TIME_DATA_TYPES = [Date, TimePeriod, TimeInterval]
|
|
24
29
|
|
|
25
|
-
FREQUENCY_MAP = {
|
|
26
|
-
YEAR_TO_PERIOD = {
|
|
27
|
-
PERIOD_ORDER = {
|
|
30
|
+
FREQUENCY_MAP = {"Y": "years", "M": "months", "D": "days"}
|
|
31
|
+
YEAR_TO_PERIOD = {"S": 2, "Q": 4, "M": 12, "W": 52, "D": 365}
|
|
32
|
+
PERIOD_ORDER = {"A": 0, "S": 1, "Q": 2, "M": 3, "W": 4, "D": 5}
|
|
28
33
|
|
|
29
34
|
op = FLOW_TO_STOCK
|
|
30
35
|
|
|
@@ -42,7 +47,7 @@ class Time(Operators.Operator):
|
|
|
42
47
|
def sort_by_time(cls, operand: Dataset) -> Optional[pd.DataFrame]:
|
|
43
48
|
time_id = cls._get_time_id(operand)
|
|
44
49
|
if time_id is None:
|
|
45
|
-
return
|
|
50
|
+
return None
|
|
46
51
|
ids = [id.name for id in operand.get_identifiers() if id.name != time_id]
|
|
47
52
|
ids.append(time_id)
|
|
48
53
|
if operand.data is None:
|
|
@@ -55,47 +60,55 @@ class Time(Operators.Operator):
|
|
|
55
60
|
return tp_value.period_indicator
|
|
56
61
|
|
|
57
62
|
@classmethod
|
|
58
|
-
def parse_date(cls, date_str):
|
|
63
|
+
def parse_date(cls, date_str: str) -> date:
|
|
59
64
|
return date.fromisoformat(date_str)
|
|
60
65
|
|
|
61
66
|
@classmethod
|
|
62
|
-
def get_frequencies(cls, dates):
|
|
63
|
-
dates =
|
|
64
|
-
dates =
|
|
65
|
-
|
|
67
|
+
def get_frequencies(cls, dates: Any) -> Any:
|
|
68
|
+
dates = pd.to_datetime(dates)
|
|
69
|
+
dates = dates.sort_values()
|
|
70
|
+
deltas = dates.diff().dropna()
|
|
71
|
+
return deltas
|
|
66
72
|
|
|
67
73
|
@classmethod
|
|
68
|
-
def find_min_frequency(cls, differences):
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
74
|
+
def find_min_frequency(cls, differences: Any) -> str:
|
|
75
|
+
months_deltas = differences.apply(lambda x: x.days // 30)
|
|
76
|
+
days_deltas = differences.apply(lambda x: x.days)
|
|
77
|
+
min_months = min(
|
|
78
|
+
(diff for diff in months_deltas if diff > 0 and diff % 12 != 0), default=None
|
|
79
|
+
)
|
|
80
|
+
min_days = min(
|
|
81
|
+
(diff for diff in days_deltas if diff > 0 and diff % 365 != 0 and diff % 366 != 0),
|
|
82
|
+
default=None,
|
|
83
|
+
)
|
|
84
|
+
return "D" if min_days else "M" if min_months else "Y"
|
|
72
85
|
|
|
73
86
|
@classmethod
|
|
74
|
-
def get_frequency_from_time(cls, interval):
|
|
75
|
-
start_date, end_date = interval.split(
|
|
87
|
+
def get_frequency_from_time(cls, interval: str) -> Any:
|
|
88
|
+
start_date, end_date = interval.split("/")
|
|
76
89
|
return date.fromisoformat(end_date) - date.fromisoformat(start_date)
|
|
77
90
|
|
|
78
91
|
@classmethod
|
|
79
|
-
def get_date_format(cls, date_str):
|
|
92
|
+
def get_date_format(cls, date_str: Union[str, date]) -> str:
|
|
80
93
|
date = cls.parse_date(date_str) if isinstance(date_str, str) else date_str
|
|
81
|
-
return
|
|
94
|
+
return "%Y-%m-%d" if date.day >= 1 else "%Y-%m" if date.month >= 1 else "%Y"
|
|
82
95
|
|
|
83
96
|
|
|
84
97
|
class Unary(Time):
|
|
85
98
|
|
|
86
99
|
@classmethod
|
|
87
|
-
def validate(cls, operand:
|
|
100
|
+
def validate(cls, operand: Any) -> Any:
|
|
88
101
|
if not isinstance(operand, Dataset):
|
|
89
102
|
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
|
|
90
103
|
if cls._get_time_id(operand) is None:
|
|
91
104
|
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
|
|
92
105
|
operand.data = cls.sort_by_time(operand)
|
|
93
|
-
return Dataset(name=
|
|
106
|
+
return Dataset(name="result", components=operand.components.copy(), data=None)
|
|
94
107
|
|
|
95
108
|
@classmethod
|
|
96
|
-
def evaluate(cls, operand:
|
|
109
|
+
def evaluate(cls, operand: Any) -> Any:
|
|
97
110
|
result = cls.validate(operand)
|
|
98
|
-
result.data = operand.data.copy()
|
|
111
|
+
result.data = operand.data.copy() if operand.data is not None else pd.DataFrame()
|
|
99
112
|
if len(operand.data) < 2:
|
|
100
113
|
return result
|
|
101
114
|
|
|
@@ -109,8 +122,11 @@ class Unary(Time):
|
|
|
109
122
|
if data_type == TimePeriod:
|
|
110
123
|
result.data = cls._period_accumulation(result.data, measure_names)
|
|
111
124
|
elif data_type == Date or data_type == TimeInterval:
|
|
112
|
-
result.data[measure_names] =
|
|
113
|
-
|
|
125
|
+
result.data[measure_names] = (
|
|
126
|
+
result.data.groupby(cls.other_ids)[measure_names]
|
|
127
|
+
.apply(cls.py_op)
|
|
128
|
+
.reset_index(drop=True)
|
|
129
|
+
)
|
|
114
130
|
else:
|
|
115
131
|
raise SemanticError("1-1-19-8", op=cls.op, comp_type="dataset", param="date type")
|
|
116
132
|
return result
|
|
@@ -118,10 +134,14 @@ class Unary(Time):
|
|
|
118
134
|
@classmethod
|
|
119
135
|
def _period_accumulation(cls, data: pd.DataFrame, measure_names: List[str]) -> pd.DataFrame:
|
|
120
136
|
data = data.copy()
|
|
121
|
-
data[
|
|
122
|
-
|
|
137
|
+
data["Period_group_col"] = (
|
|
138
|
+
data[cls.time_id].apply(cls._get_period).apply(lambda x: cls.PERIOD_ORDER[x])
|
|
139
|
+
)
|
|
140
|
+
result = data.groupby(cls.other_ids + ["Period_group_col"], group_keys=False)[
|
|
141
|
+
measure_names
|
|
142
|
+
].apply(cls.py_op)
|
|
123
143
|
data[measure_names] = result.reset_index(drop=True)
|
|
124
|
-
return data.drop(columns=
|
|
144
|
+
return data.drop(columns="Period_group_col")
|
|
125
145
|
|
|
126
146
|
|
|
127
147
|
class Binary(Time):
|
|
@@ -132,18 +152,20 @@ class Period_indicator(Unary):
|
|
|
132
152
|
op = PERIOD_INDICATOR
|
|
133
153
|
|
|
134
154
|
@classmethod
|
|
135
|
-
def validate(cls, operand:
|
|
136
|
-
) -> Dataset | DataComponent | Scalar:
|
|
155
|
+
def validate(cls, operand: Any) -> Any:
|
|
137
156
|
if isinstance(operand, Dataset):
|
|
138
157
|
time_id = cls._get_time_id(operand)
|
|
139
158
|
if time_id is None or operand.components[time_id].data_type != TimePeriod:
|
|
140
159
|
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time period dataset")
|
|
141
|
-
result_components = {
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
160
|
+
result_components = {
|
|
161
|
+
comp.name: comp
|
|
162
|
+
for comp in operand.components.values()
|
|
163
|
+
if comp.role == Role.IDENTIFIER
|
|
164
|
+
}
|
|
165
|
+
result_components["duration_var"] = Component(
|
|
166
|
+
name="duration_var", data_type=Duration, role=Role.MEASURE, nullable=True
|
|
167
|
+
)
|
|
168
|
+
return Dataset(name="result", components=result_components, data=None)
|
|
147
169
|
# DataComponent and Scalar validation
|
|
148
170
|
if operand.data_type != TimePeriod:
|
|
149
171
|
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time period component")
|
|
@@ -152,8 +174,9 @@ class Period_indicator(Unary):
|
|
|
152
174
|
return Scalar(name=operand.name, data_type=Duration, value=None)
|
|
153
175
|
|
|
154
176
|
@classmethod
|
|
155
|
-
def evaluate(
|
|
156
|
-
|
|
177
|
+
def evaluate(
|
|
178
|
+
cls, operand: Union[Dataset, DataComponent, Scalar, str]
|
|
179
|
+
) -> Union[Dataset, DataComponent, Scalar, str]:
|
|
157
180
|
result = cls.validate(operand)
|
|
158
181
|
if isinstance(operand, str):
|
|
159
182
|
return cls._get_period(str(operand))
|
|
@@ -161,23 +184,32 @@ class Period_indicator(Unary):
|
|
|
161
184
|
result.value = cls._get_period(str(operand.value))
|
|
162
185
|
return result
|
|
163
186
|
if isinstance(operand, DataComponent):
|
|
164
|
-
|
|
187
|
+
if operand.data is not None:
|
|
188
|
+
result.data = operand.data.map(cls._get_period, na_action="ignore")
|
|
165
189
|
return result
|
|
166
190
|
cls.time_id = cls._get_time_id(operand)
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
191
|
+
result.data = (
|
|
192
|
+
operand.data.copy()[result.get_identifiers_names()]
|
|
193
|
+
if (operand.data is not None)
|
|
194
|
+
else pd.Series()
|
|
195
|
+
)
|
|
196
|
+
period_series: Any = result.data[cls.time_id].map(cls._get_period) # type: ignore[index]
|
|
197
|
+
result.data["duration_var"] = period_series
|
|
172
198
|
return result
|
|
173
199
|
|
|
174
200
|
|
|
175
201
|
class Flow_to_stock(Unary):
|
|
176
|
-
|
|
202
|
+
|
|
203
|
+
@classmethod
|
|
204
|
+
def py_op(cls, x: Any) -> Any:
|
|
205
|
+
return x.cumsum().fillna(x)
|
|
177
206
|
|
|
178
207
|
|
|
179
208
|
class Stock_to_flow(Unary):
|
|
180
|
-
|
|
209
|
+
|
|
210
|
+
@classmethod
|
|
211
|
+
def py_op(cls, x: Any) -> Any:
|
|
212
|
+
return x.diff().fillna(x)
|
|
181
213
|
|
|
182
214
|
|
|
183
215
|
class Fill_time_series(Binary):
|
|
@@ -186,6 +218,8 @@ class Fill_time_series(Binary):
|
|
|
186
218
|
@classmethod
|
|
187
219
|
def evaluate(cls, operand: Dataset, fill_type: str) -> Dataset:
|
|
188
220
|
result = cls.validate(operand, fill_type)
|
|
221
|
+
if operand.data is None:
|
|
222
|
+
operand.data = pd.DataFrame()
|
|
189
223
|
result.data = operand.data.copy()
|
|
190
224
|
result.data[cls.time_id] = result.data[cls.time_id].astype(str)
|
|
191
225
|
if len(result.data) < 2:
|
|
@@ -195,16 +229,21 @@ class Fill_time_series(Binary):
|
|
|
195
229
|
result.data = cls.fill_periods(result.data, fill_type)
|
|
196
230
|
elif data_type == Date:
|
|
197
231
|
frequencies = cls.get_frequencies(operand.data[cls.time_id].apply(cls.parse_date))
|
|
198
|
-
result.data = cls.fill_dates(
|
|
199
|
-
|
|
232
|
+
result.data = cls.fill_dates(
|
|
233
|
+
result.data, fill_type, cls.find_min_frequency(frequencies)
|
|
234
|
+
)
|
|
200
235
|
elif data_type == TimeInterval:
|
|
201
236
|
frequencies = result.data[cls.time_id].apply(cls.get_frequency_from_time).unique()
|
|
202
237
|
if len(frequencies) > 1:
|
|
203
|
-
raise SemanticError(
|
|
204
|
-
|
|
238
|
+
raise SemanticError(
|
|
239
|
+
"1-1-19-9",
|
|
240
|
+
op=cls.op,
|
|
241
|
+
comp_type="dataset",
|
|
242
|
+
param="single time interval frequency",
|
|
243
|
+
)
|
|
205
244
|
result.data = cls.fill_time_intervals(result.data, fill_type, frequencies[0])
|
|
206
245
|
else:
|
|
207
|
-
raise SemanticError("1-1-19-
|
|
246
|
+
raise SemanticError("1-1-19-2", op=cls.op)
|
|
208
247
|
return result
|
|
209
248
|
|
|
210
249
|
@classmethod
|
|
@@ -216,50 +255,48 @@ class Fill_time_series(Binary):
|
|
|
216
255
|
cls.measures = operand.get_measures_names()
|
|
217
256
|
if cls.time_id is None:
|
|
218
257
|
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
|
|
219
|
-
if fill_type not in [
|
|
220
|
-
fill_type =
|
|
221
|
-
return Dataset(name=
|
|
258
|
+
if fill_type not in ["all", "single"]:
|
|
259
|
+
fill_type = "all"
|
|
260
|
+
return Dataset(name="result", components=operand.components.copy(), data=None)
|
|
222
261
|
|
|
223
262
|
@classmethod
|
|
224
|
-
def max_min_from_period(cls, data, mode=
|
|
263
|
+
def max_min_from_period(cls, data: pd.DataFrame, mode: str = "all") -> Dict[str, Any]:
|
|
225
264
|
|
|
265
|
+
result_dict: Dict[Any, Any] = {}
|
|
226
266
|
data = data.assign(
|
|
227
267
|
Periods_col=data[cls.time_id].apply(cls._get_period),
|
|
228
268
|
Periods_values_col=data[cls.time_id].apply(
|
|
229
|
-
lambda x: int(re.sub(r
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
elif mode == 'single':
|
|
246
|
-
result_dict = {}
|
|
247
|
-
for name, group in data.groupby(cls.other_ids + ['Periods_col']):
|
|
269
|
+
lambda x: int(re.sub(r"[^\d]", "", x.split("-")[-1]))
|
|
270
|
+
),
|
|
271
|
+
Year_values_col=data[cls.time_id].apply(lambda x: int(x.split("-")[0])),
|
|
272
|
+
).sort_values(by=["Year_values_col", "Periods_col", "Periods_values_col"])
|
|
273
|
+
|
|
274
|
+
if mode == "all":
|
|
275
|
+
min_year = data["Year_values_col"].min()
|
|
276
|
+
max_year = data["Year_values_col"].max()
|
|
277
|
+
result_dict = {"min": {"A": min_year}, "max": {"A": max_year}}
|
|
278
|
+
for period, group in data.groupby("Periods_col"):
|
|
279
|
+
if period != "A":
|
|
280
|
+
result_dict["min"][period] = group["Periods_values_col"].min()
|
|
281
|
+
result_dict["max"][period] = group["Periods_values_col"].max()
|
|
282
|
+
|
|
283
|
+
elif mode == "single":
|
|
284
|
+
for name, group in data.groupby(cls.other_ids + ["Periods_col"]):
|
|
248
285
|
key = name[:-1] if len(name[:-1]) > 1 else name[0]
|
|
249
286
|
period = name[-1]
|
|
250
287
|
if key not in result_dict:
|
|
251
288
|
result_dict[key] = {
|
|
252
|
-
|
|
253
|
-
|
|
289
|
+
"min": {"A": group["Year_values_col"].min()},
|
|
290
|
+
"max": {"A": group["Year_values_col"].max()},
|
|
254
291
|
}
|
|
255
|
-
if period !=
|
|
256
|
-
year_min = group[
|
|
257
|
-
year_max = group[
|
|
292
|
+
if period != "A":
|
|
293
|
+
year_min = group["Year_values_col"].min()
|
|
294
|
+
year_max = group["Year_values_col"].max()
|
|
258
295
|
|
|
259
|
-
result_dict[key][
|
|
260
|
-
result_dict[key][
|
|
261
|
-
result_dict[key][
|
|
262
|
-
result_dict[key][
|
|
296
|
+
result_dict[key]["min"]["A"] = min(result_dict[key]["min"]["A"], year_min)
|
|
297
|
+
result_dict[key]["max"]["A"] = max(result_dict[key]["max"]["A"], year_max)
|
|
298
|
+
result_dict[key]["min"][period] = group["Periods_values_col"].min()
|
|
299
|
+
result_dict[key]["max"][period] = group["Periods_values_col"].max()
|
|
263
300
|
|
|
264
301
|
else:
|
|
265
302
|
raise ValueError("Mode must be either 'all' or 'single'")
|
|
@@ -267,189 +304,226 @@ class Fill_time_series(Binary):
|
|
|
267
304
|
|
|
268
305
|
@classmethod
|
|
269
306
|
def fill_periods(cls, data: pd.DataFrame, fill_type: str) -> pd.DataFrame:
|
|
270
|
-
result_data = cls.period_filler(data, single=(fill_type !=
|
|
307
|
+
result_data = cls.period_filler(data, single=(fill_type != "all"))
|
|
271
308
|
not_na = result_data[cls.measures].notna().any(axis=1)
|
|
272
309
|
duplicated = result_data.duplicated(subset=(cls.other_ids + [cls.time_id]), keep=False)
|
|
273
310
|
return result_data[~duplicated | not_na]
|
|
274
311
|
|
|
275
312
|
@classmethod
|
|
276
|
-
def period_filler(cls, data: pd.DataFrame, single=False) -> pd.DataFrame:
|
|
313
|
+
def period_filler(cls, data: pd.DataFrame, single: bool = False) -> pd.DataFrame:
|
|
277
314
|
filled_data = []
|
|
278
|
-
MAX_MIN = cls.max_min_from_period(data, mode=
|
|
279
|
-
cls.periods =
|
|
280
|
-
MAX_MIN[
|
|
315
|
+
MAX_MIN = cls.max_min_from_period(data, mode="single" if single else "all")
|
|
316
|
+
cls.periods = (
|
|
317
|
+
list(MAX_MIN[list(MAX_MIN.keys())[0]]["min"].keys())
|
|
318
|
+
if single
|
|
319
|
+
else list(MAX_MIN["min"].keys())
|
|
320
|
+
)
|
|
281
321
|
groups = data.groupby(cls.other_ids)
|
|
282
322
|
|
|
283
323
|
for group, group_df in groups:
|
|
284
|
-
period_limits =
|
|
285
|
-
|
|
324
|
+
period_limits = (
|
|
325
|
+
MAX_MIN if not single else MAX_MIN[group if len(group) > 1 else group[0]]
|
|
326
|
+
)
|
|
327
|
+
years = list(range(period_limits["min"]["A"], period_limits["max"]["A"] + 1))
|
|
286
328
|
for period in cls.periods:
|
|
287
|
-
if period ==
|
|
329
|
+
if period == "A":
|
|
288
330
|
filled_data.extend(cls.fill_periods_rows(group_df, period, years))
|
|
289
331
|
else:
|
|
290
|
-
if period in period_limits[
|
|
291
|
-
vals =
|
|
332
|
+
if period in period_limits["min"] and period in period_limits["max"]:
|
|
333
|
+
vals = list(
|
|
334
|
+
range(period_limits["min"][period], period_limits["max"][period] + 1)
|
|
335
|
+
)
|
|
292
336
|
filled_data.extend(
|
|
293
|
-
cls.fill_periods_rows(group_df, period, years, vals=vals)
|
|
337
|
+
cls.fill_periods_rows(group_df, period, years, vals=vals)
|
|
338
|
+
)
|
|
294
339
|
|
|
295
340
|
filled_data = pd.concat(filled_data, ignore_index=True)
|
|
296
|
-
combined_data = pd.concat([filled_data, data], ignore_index=True)
|
|
297
|
-
if len(cls.periods) == 1 and cls.periods[0] ==
|
|
341
|
+
combined_data = pd.concat([filled_data, data], ignore_index=True) # type: ignore[list-item]
|
|
342
|
+
if len(cls.periods) == 1 and cls.periods[0] == "A":
|
|
298
343
|
combined_data[cls.time_id] = combined_data[cls.time_id].astype(int)
|
|
299
344
|
else:
|
|
300
345
|
combined_data[cls.time_id] = combined_data[cls.time_id].astype(str)
|
|
301
346
|
return combined_data.sort_values(by=cls.other_ids + [cls.time_id])
|
|
302
347
|
|
|
303
348
|
@classmethod
|
|
304
|
-
def fill_periods_rows(
|
|
349
|
+
def fill_periods_rows(
|
|
350
|
+
cls, group_df: Any, period: str, years: List[int], vals: Optional[List[int]] = None
|
|
351
|
+
) -> List[Any]:
|
|
305
352
|
rows = []
|
|
306
353
|
for year in years:
|
|
307
|
-
if period ==
|
|
354
|
+
if period == "A":
|
|
308
355
|
rows.append(cls.create_period_row(group_df, period, year))
|
|
309
|
-
|
|
356
|
+
elif vals is not None:
|
|
310
357
|
for val in vals:
|
|
311
358
|
rows.append(cls.create_period_row(group_df, period, year, val=val))
|
|
312
359
|
return rows
|
|
313
360
|
|
|
314
361
|
@classmethod
|
|
315
|
-
def create_period_row(
|
|
362
|
+
def create_period_row(
|
|
363
|
+
cls, group_df: Any, period: str, year: int, val: Optional[int] = None
|
|
364
|
+
) -> Any:
|
|
316
365
|
row = group_df.iloc[0].copy()
|
|
317
|
-
row[cls.time_id] = f"{year}" if period ==
|
|
366
|
+
row[cls.time_id] = f"{year}" if period == "A" else f"{year}-{period}{val:d}"
|
|
318
367
|
row[cls.measures] = None
|
|
319
368
|
return row.to_frame().T
|
|
320
369
|
|
|
321
370
|
@classmethod
|
|
322
|
-
def max_min_from_date(cls, data, fill_type=
|
|
323
|
-
def compute_min_max(group):
|
|
371
|
+
def max_min_from_date(cls, data: pd.DataFrame, fill_type: str = "all") -> Dict[str, Any]:
|
|
372
|
+
def compute_min_max(group: Any) -> Dict[str, Any]:
|
|
324
373
|
min_date = cls.parse_date(group.min())
|
|
325
374
|
max_date = cls.parse_date(group.max())
|
|
326
375
|
date_format = cls.get_date_format(max_date)
|
|
327
|
-
return {
|
|
376
|
+
return {"min": min_date, "max": max_date, "date_format": date_format}
|
|
328
377
|
|
|
329
|
-
if fill_type ==
|
|
378
|
+
if fill_type == "all":
|
|
330
379
|
return compute_min_max(data[cls.time_id])
|
|
331
380
|
|
|
332
381
|
grouped = data.groupby(cls.other_ids)
|
|
333
|
-
result_dict = {
|
|
334
|
-
|
|
335
|
-
|
|
382
|
+
result_dict = {
|
|
383
|
+
name if len(name) > 1 else name[0]: compute_min_max(group[cls.time_id])
|
|
384
|
+
for name, group in grouped
|
|
385
|
+
}
|
|
336
386
|
return result_dict
|
|
337
387
|
|
|
338
388
|
@classmethod
|
|
339
|
-
def fill_dates(cls, data: pd.DataFrame, fill_type, min_frequency) -> pd.DataFrame:
|
|
389
|
+
def fill_dates(cls, data: pd.DataFrame, fill_type: str, min_frequency: str) -> pd.DataFrame:
|
|
340
390
|
result_data = cls.date_filler(data, fill_type, min_frequency)
|
|
341
391
|
not_na = result_data[cls.measures].notna().any(axis=1)
|
|
342
392
|
duplicated = result_data.duplicated(subset=(cls.other_ids + [cls.time_id]), keep=False)
|
|
343
393
|
return result_data[~duplicated | not_na]
|
|
344
394
|
|
|
345
395
|
@classmethod
|
|
346
|
-
def date_filler(cls, data: pd.DataFrame, fill_type, min_frequency) -> pd.DataFrame:
|
|
396
|
+
def date_filler(cls, data: pd.DataFrame, fill_type: str, min_frequency: str) -> pd.DataFrame:
|
|
347
397
|
MAX_MIN = cls.max_min_from_date(data, fill_type)
|
|
348
398
|
date_format = None
|
|
349
399
|
filled_data = []
|
|
350
400
|
|
|
351
|
-
def create_filled_dates(
|
|
352
|
-
|
|
401
|
+
def create_filled_dates(
|
|
402
|
+
group: Any, min_max: Dict[str, Any]
|
|
403
|
+
) -> (pd.DataFrame, str): # type: ignore[syntax]
|
|
404
|
+
date_range = pd.date_range(start=min_max["min"], end=min_max["max"], freq=min_frequency)
|
|
353
405
|
date_df = pd.DataFrame(date_range, columns=[cls.time_id])
|
|
354
406
|
date_df[cls.other_ids] = group.iloc[0][cls.other_ids]
|
|
355
407
|
date_df[cls.measures] = None
|
|
356
|
-
return date_df, min_max[
|
|
408
|
+
return date_df, min_max["date_format"]
|
|
357
409
|
|
|
358
410
|
for name, group in data.groupby(cls.other_ids):
|
|
359
|
-
min_max = MAX_MIN if fill_type ==
|
|
411
|
+
min_max = MAX_MIN if fill_type == "all" else MAX_MIN[name if len(name) > 1 else name[0]]
|
|
360
412
|
filled_dates, date_format = create_filled_dates(group, min_max)
|
|
361
413
|
filled_data.append(filled_dates)
|
|
362
414
|
|
|
363
415
|
filled_data = pd.concat(filled_data, ignore_index=True)
|
|
364
416
|
filled_data[cls.time_id] = filled_data[cls.time_id].dt.strftime(date_format)
|
|
365
|
-
combined_data = pd.concat([filled_data, data], ignore_index=True)
|
|
417
|
+
combined_data = pd.concat([filled_data, data], ignore_index=True) # type: ignore[list-item]
|
|
366
418
|
combined_data[cls.time_id] = combined_data[cls.time_id].astype(str)
|
|
367
419
|
return combined_data.sort_values(by=cls.other_ids + [cls.time_id])
|
|
368
420
|
|
|
369
421
|
@classmethod
|
|
370
|
-
def max_min_from_time(cls, data, fill_type=
|
|
371
|
-
data = data.applymap(str).sort_values(
|
|
422
|
+
def max_min_from_time(cls, data: pd.DataFrame, fill_type: str = "all") -> Dict[str, Any]:
|
|
423
|
+
data = data.applymap(str).sort_values( # type: ignore[operator]
|
|
424
|
+
by=cls.other_ids + [cls.time_id]
|
|
425
|
+
)
|
|
372
426
|
|
|
373
|
-
def extract_max_min(group):
|
|
374
|
-
start_dates = group.apply(lambda x: x.split(
|
|
375
|
-
end_dates = group.apply(lambda x: x.split(
|
|
376
|
-
return {
|
|
377
|
-
|
|
427
|
+
def extract_max_min(group: Any) -> Dict[str, Any]:
|
|
428
|
+
start_dates = group.apply(lambda x: x.split("/")[0])
|
|
429
|
+
end_dates = group.apply(lambda x: x.split("/")[1])
|
|
430
|
+
return {
|
|
431
|
+
"start": {"min": start_dates.min(), "max": start_dates.max()},
|
|
432
|
+
"end": {"min": end_dates.min(), "max": end_dates.max()},
|
|
433
|
+
}
|
|
378
434
|
|
|
379
|
-
if fill_type ==
|
|
435
|
+
if fill_type == "all":
|
|
380
436
|
return extract_max_min(data[cls.time_id])
|
|
381
437
|
else:
|
|
382
|
-
return {
|
|
383
|
-
|
|
438
|
+
return {
|
|
439
|
+
name: extract_max_min(group[cls.time_id])
|
|
440
|
+
for name, group in data.groupby(cls.other_ids)
|
|
441
|
+
}
|
|
384
442
|
|
|
385
443
|
@classmethod
|
|
386
|
-
def fill_time_intervals(
|
|
387
|
-
|
|
444
|
+
def fill_time_intervals(
|
|
445
|
+
cls, data: pd.DataFrame, fill_type: str, frequency: str
|
|
446
|
+
) -> pd.DataFrame:
|
|
388
447
|
result_data = cls.time_filler(data, fill_type, frequency)
|
|
389
448
|
not_na = result_data[cls.measures].notna().any(axis=1)
|
|
390
449
|
duplicated = result_data.duplicated(subset=(cls.other_ids + [cls.time_id]), keep=False)
|
|
391
450
|
return result_data[~duplicated | not_na]
|
|
392
451
|
|
|
393
452
|
@classmethod
|
|
394
|
-
def time_filler(cls, data: pd.DataFrame, fill_type,
|
|
395
|
-
frequency) -> pd.DataFrame:
|
|
453
|
+
def time_filler(cls, data: pd.DataFrame, fill_type: str, frequency: str) -> pd.DataFrame:
|
|
396
454
|
MAX_MIN = cls.max_min_from_time(data, fill_type)
|
|
397
455
|
|
|
398
|
-
def fill_group(group_df):
|
|
456
|
+
def fill_group(group_df: pd.DataFrame) -> pd.DataFrame:
|
|
399
457
|
group_key = group_df.iloc[0][cls.other_ids].values
|
|
400
|
-
if fill_type !=
|
|
458
|
+
if fill_type != "all":
|
|
401
459
|
group_key = group_key[0] if len(group_key) == 1 else tuple(group_key)
|
|
402
|
-
group_dict = MAX_MIN if fill_type ==
|
|
460
|
+
group_dict = MAX_MIN if fill_type == "all" else MAX_MIN[group_key]
|
|
403
461
|
|
|
404
|
-
intervals = [
|
|
405
|
-
|
|
462
|
+
intervals = [
|
|
463
|
+
f"{group_dict['start']['min']}/{group_dict['end']['min']}",
|
|
464
|
+
f"{group_dict['start']['max']}/{group_dict['end']['max']}",
|
|
465
|
+
]
|
|
406
466
|
for interval in intervals:
|
|
407
467
|
if interval not in group_df[cls.time_id].values:
|
|
408
468
|
empty_row = group_df.iloc[0].copy()
|
|
409
469
|
empty_row[cls.time_id] = interval
|
|
410
470
|
empty_row[cls.measures] = None
|
|
411
|
-
group_df = group_df.append(
|
|
471
|
+
group_df = group_df.append( # type: ignore[operator]
|
|
472
|
+
empty_row, ignore_index=True
|
|
473
|
+
)
|
|
412
474
|
start_group_df = group_df.copy()
|
|
413
475
|
start_group_df[cls.time_id] = start_group_df[cls.time_id].apply(
|
|
414
|
-
lambda x: x.split(
|
|
476
|
+
lambda x: x.split("/")[0]
|
|
477
|
+
)
|
|
415
478
|
end_group_df = group_df.copy()
|
|
416
|
-
end_group_df[cls.time_id] = end_group_df[cls.time_id].apply(lambda x: x.split(
|
|
479
|
+
end_group_df[cls.time_id] = end_group_df[cls.time_id].apply(lambda x: x.split("/")[1])
|
|
417
480
|
start_filled = cls.date_filler(start_group_df, fill_type, frequency)
|
|
418
481
|
end_filled = cls.date_filler(end_group_df, fill_type, frequency)
|
|
419
|
-
start_filled[cls.time_id] = start_filled[cls.time_id].str.cat(
|
|
420
|
-
|
|
482
|
+
start_filled[cls.time_id] = start_filled[cls.time_id].str.cat(
|
|
483
|
+
end_filled[cls.time_id], sep="/"
|
|
484
|
+
)
|
|
421
485
|
return start_filled
|
|
422
486
|
|
|
423
487
|
filled_data = [fill_group(group_df) for _, group_df in data.groupby(cls.other_ids)]
|
|
424
|
-
return
|
|
425
|
-
|
|
488
|
+
return (
|
|
489
|
+
pd.concat(filled_data, ignore_index=True)
|
|
490
|
+
.sort_values(by=cls.other_ids + [cls.time_id])
|
|
491
|
+
.drop_duplicates()
|
|
492
|
+
)
|
|
426
493
|
|
|
427
494
|
|
|
428
495
|
class Time_Shift(Binary):
|
|
429
496
|
op = TIMESHIFT
|
|
430
497
|
|
|
431
498
|
@classmethod
|
|
432
|
-
def evaluate(cls, operand: Dataset, shift_value:
|
|
499
|
+
def evaluate(cls, operand: Dataset, shift_value: Any) -> Dataset:
|
|
433
500
|
result = cls.validate(operand, shift_value)
|
|
434
|
-
result.data = operand.data.copy()
|
|
501
|
+
result.data = operand.data.copy() if operand.data is not None else pd.DataFrame()
|
|
435
502
|
shift_value = int(shift_value.value)
|
|
436
503
|
cls.time_id = cls._get_time_id(result)
|
|
437
|
-
|
|
504
|
+
|
|
505
|
+
data_type: Any = (
|
|
506
|
+
result.components[cls.time_id].data_type if isinstance(cls.time_id, str) else None
|
|
507
|
+
)
|
|
438
508
|
|
|
439
509
|
if data_type == Date:
|
|
440
510
|
freq = cls.find_min_frequency(
|
|
441
|
-
cls.get_frequencies(
|
|
442
|
-
|
|
443
|
-
|
|
511
|
+
cls.get_frequencies(
|
|
512
|
+
result.data[cls.time_id].map(cls.parse_date, na_action="ignore")
|
|
513
|
+
)
|
|
514
|
+
)
|
|
515
|
+
result.data[cls.time_id] = cls.shift_dates(result.data[cls.time_id], shift_value, freq)
|
|
444
516
|
elif data_type == Time:
|
|
445
517
|
freq = cls.get_frequency_from_time(result.data[cls.time_id].iloc[0])
|
|
446
518
|
result.data[cls.time_id] = result.data[cls.time_id].apply(
|
|
447
|
-
lambda x: cls.shift_interval(x, shift_value, freq)
|
|
519
|
+
lambda x: cls.shift_interval(x, shift_value, freq)
|
|
520
|
+
)
|
|
448
521
|
elif data_type == TimePeriod:
|
|
449
522
|
periods = result.data[cls.time_id].apply(cls._get_period).unique()
|
|
450
523
|
result.data[cls.time_id] = result.data[cls.time_id].apply(
|
|
451
|
-
lambda x: cls.shift_period(x, shift_value)
|
|
452
|
-
|
|
524
|
+
lambda x: cls.shift_period(x, shift_value)
|
|
525
|
+
)
|
|
526
|
+
if len(periods) == 1 and periods[0] == "A":
|
|
453
527
|
result.data[cls.time_id] = result.data[cls.time_id].astype(int)
|
|
454
528
|
else:
|
|
455
529
|
raise SemanticError("1-1-19-2", op=cls.op)
|
|
@@ -457,27 +531,41 @@ class Time_Shift(Binary):
|
|
|
457
531
|
|
|
458
532
|
@classmethod
|
|
459
533
|
def validate(cls, operand: Dataset, shift_value: str) -> Dataset:
|
|
460
|
-
if
|
|
534
|
+
if cls._get_time_id(operand) is None:
|
|
461
535
|
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
|
|
462
|
-
|
|
463
|
-
return Dataset(name='result', components=operand.components.copy(), data=None)
|
|
536
|
+
return Dataset(name="result", components=operand.components.copy(), data=None)
|
|
464
537
|
|
|
465
538
|
@classmethod
|
|
466
|
-
def
|
|
467
|
-
|
|
539
|
+
def shift_dates(cls, dates: Any, shift_value: int, frequency: str) -> Any:
|
|
540
|
+
dates = pd.to_datetime(dates)
|
|
541
|
+
if frequency == "D":
|
|
542
|
+
return dates + pd.to_timedelta(shift_value, unit="D")
|
|
543
|
+
elif frequency == "W":
|
|
544
|
+
return dates + pd.to_timedelta(shift_value, unit="W")
|
|
545
|
+
elif frequency == "Y":
|
|
546
|
+
return dates + pd.DateOffset(years=shift_value)
|
|
547
|
+
elif frequency in ["M", "Q", "S"]:
|
|
548
|
+
return dates + pd.DateOffset(months=shift_value)
|
|
549
|
+
raise SemanticError("2-1-19-2", period=frequency)
|
|
468
550
|
|
|
469
551
|
@classmethod
|
|
470
|
-
def shift_period(
|
|
552
|
+
def shift_period(
|
|
553
|
+
cls, period_str: str, shift_value: int, frequency: Optional[int] = None
|
|
554
|
+
) -> str:
|
|
471
555
|
period_type = cls._get_period(period_str)
|
|
472
556
|
|
|
473
|
-
if period_type ==
|
|
557
|
+
if period_type == "A":
|
|
474
558
|
return str(int(period_str) + shift_value)
|
|
475
559
|
|
|
476
560
|
if frequency:
|
|
477
561
|
shift_value *= frequency
|
|
478
562
|
|
|
479
563
|
tp_value = TimePeriodHandler(period_str)
|
|
480
|
-
year, period, value =
|
|
564
|
+
year, period, value = (
|
|
565
|
+
tp_value.year,
|
|
566
|
+
tp_value.period_indicator,
|
|
567
|
+
tp_value.period_number + shift_value,
|
|
568
|
+
)
|
|
481
569
|
period_limit = cls.YEAR_TO_PERIOD[period]
|
|
482
570
|
|
|
483
571
|
if value <= 0:
|
|
@@ -490,23 +578,23 @@ class Time_Shift(Binary):
|
|
|
490
578
|
return f"{year}-{period}{value}"
|
|
491
579
|
|
|
492
580
|
@classmethod
|
|
493
|
-
def shift_interval(cls, interval, shift_value, frequency):
|
|
494
|
-
start_date, end_date = interval.split(
|
|
495
|
-
start_date = cls.
|
|
496
|
-
end_date = cls.
|
|
497
|
-
return f
|
|
581
|
+
def shift_interval(cls, interval: str, shift_value: Any, frequency: str) -> str:
|
|
582
|
+
start_date, end_date = interval.split("/")
|
|
583
|
+
start_date = cls.shift_dates(start_date, shift_value, frequency)
|
|
584
|
+
end_date = cls.shift_dates(end_date, shift_value, frequency)
|
|
585
|
+
return f"{start_date}/{end_date}"
|
|
498
586
|
|
|
499
587
|
|
|
500
588
|
class Time_Aggregation(Time):
|
|
501
589
|
op = TIME_AGG
|
|
502
590
|
|
|
503
591
|
@classmethod
|
|
504
|
-
def _check_duration(cls, value: str):
|
|
592
|
+
def _check_duration(cls, value: str) -> None:
|
|
505
593
|
if value not in DURATION_MAPPING:
|
|
506
594
|
raise SemanticError("1-1-19-3", op=cls.op, param="duration")
|
|
507
595
|
|
|
508
596
|
@classmethod
|
|
509
|
-
def _check_params(cls, period_from: Optional[str], period_to: str):
|
|
597
|
+
def _check_params(cls, period_from: Optional[str], period_to: str) -> None:
|
|
510
598
|
cls._check_duration(period_to)
|
|
511
599
|
if period_from is not None:
|
|
512
600
|
cls._check_duration(period_from)
|
|
@@ -515,8 +603,9 @@ class Time_Aggregation(Time):
|
|
|
515
603
|
raise SemanticError("1-1-19-4", op=cls.op, value_1=period_from, value_2=period_to)
|
|
516
604
|
|
|
517
605
|
@classmethod
|
|
518
|
-
def dataset_validation(
|
|
519
|
-
|
|
606
|
+
def dataset_validation(
|
|
607
|
+
cls, operand: Dataset, period_from: Optional[str], period_to: str, conf: str
|
|
608
|
+
) -> Dataset:
|
|
520
609
|
# TODO: Review with VTL TF as this makes no sense
|
|
521
610
|
|
|
522
611
|
count_time_types = 0
|
|
@@ -526,29 +615,34 @@ class Time_Aggregation(Time):
|
|
|
526
615
|
if measure.data_type == TimePeriod and period_to == "D":
|
|
527
616
|
raise SemanticError("1-1-19-5", op=cls.op)
|
|
528
617
|
if measure.data_type == TimeInterval:
|
|
529
|
-
raise SemanticError("1-1-19-6", op=cls.op,
|
|
530
|
-
comp=measure.name)
|
|
618
|
+
raise SemanticError("1-1-19-6", op=cls.op, comp=measure.name)
|
|
531
619
|
|
|
532
620
|
count_time_types = 0
|
|
533
621
|
for id_ in operand.get_identifiers():
|
|
534
622
|
if id_.data_type in cls.TIME_DATA_TYPES:
|
|
535
623
|
count_time_types += 1
|
|
536
624
|
if count_time_types != 1:
|
|
537
|
-
raise SemanticError(
|
|
538
|
-
|
|
625
|
+
raise SemanticError(
|
|
626
|
+
"1-1-19-9", op=cls.op, comp_type="dataset", param="single time identifier"
|
|
627
|
+
)
|
|
539
628
|
|
|
540
629
|
if count_time_types != 1:
|
|
541
|
-
raise SemanticError(
|
|
542
|
-
|
|
630
|
+
raise SemanticError(
|
|
631
|
+
"1-1-19-9", op=cls.op, comp_type="dataset", param="single time measure"
|
|
632
|
+
)
|
|
543
633
|
|
|
544
|
-
result_components = {
|
|
545
|
-
|
|
634
|
+
result_components = {
|
|
635
|
+
comp.name: comp
|
|
636
|
+
for comp in operand.components.values()
|
|
637
|
+
if comp.role in [Role.IDENTIFIER, Role.MEASURE]
|
|
638
|
+
}
|
|
546
639
|
|
|
547
640
|
return Dataset(name=operand.name, components=result_components, data=None)
|
|
548
641
|
|
|
549
642
|
@classmethod
|
|
550
|
-
def component_validation(
|
|
551
|
-
|
|
643
|
+
def component_validation(
|
|
644
|
+
cls, operand: DataComponent, period_from: Optional[str], period_to: str, conf: str
|
|
645
|
+
) -> DataComponent:
|
|
552
646
|
if operand.data_type not in cls.TIME_DATA_TYPES:
|
|
553
647
|
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time component")
|
|
554
648
|
if operand.data_type == TimePeriod and period_to == "D":
|
|
@@ -559,16 +653,23 @@ class Time_Aggregation(Time):
|
|
|
559
653
|
return DataComponent(name=operand.name, data_type=operand.data_type, data=None)
|
|
560
654
|
|
|
561
655
|
@classmethod
|
|
562
|
-
def scalar_validation(
|
|
563
|
-
|
|
656
|
+
def scalar_validation(
|
|
657
|
+
cls, operand: Scalar, period_from: Optional[str], period_to: str, conf: str
|
|
658
|
+
) -> Scalar:
|
|
564
659
|
if operand.data_type not in cls.TIME_DATA_TYPES:
|
|
565
660
|
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time scalar")
|
|
566
661
|
|
|
567
662
|
return Scalar(name=operand.name, data_type=operand.data_type, value=None)
|
|
568
663
|
|
|
569
664
|
@classmethod
|
|
570
|
-
def _execute_time_aggregation(
|
|
571
|
-
|
|
665
|
+
def _execute_time_aggregation(
|
|
666
|
+
cls,
|
|
667
|
+
value: str,
|
|
668
|
+
data_type: Type[ScalarType],
|
|
669
|
+
period_from: Optional[str],
|
|
670
|
+
period_to: str,
|
|
671
|
+
conf: str,
|
|
672
|
+
) -> str:
|
|
572
673
|
if data_type == TimePeriod: # Time period
|
|
573
674
|
return _time_period_access(value, period_to)
|
|
574
675
|
|
|
@@ -585,40 +686,53 @@ class Time_Aggregation(Time):
|
|
|
585
686
|
raise NotImplementedError
|
|
586
687
|
|
|
587
688
|
@classmethod
|
|
588
|
-
def dataset_evaluation(
|
|
589
|
-
|
|
689
|
+
def dataset_evaluation(
|
|
690
|
+
cls, operand: Dataset, period_from: Optional[str], period_to: str, conf: str
|
|
691
|
+
) -> Dataset:
|
|
590
692
|
result = cls.dataset_validation(operand, period_from, period_to, conf)
|
|
591
|
-
result.data = operand.data.copy()
|
|
693
|
+
result.data = operand.data.copy() if operand.data is not None else pd.DataFrame()
|
|
592
694
|
time_measure = [m for m in operand.get_measures() if m.data_type in cls.TIME_DATA_TYPES][0]
|
|
593
695
|
result.data[time_measure.name] = result.data[time_measure.name].map(
|
|
594
|
-
lambda x: cls._execute_time_aggregation(
|
|
595
|
-
|
|
596
|
-
|
|
696
|
+
lambda x: cls._execute_time_aggregation(
|
|
697
|
+
x, time_measure.data_type, period_from, period_to, conf
|
|
698
|
+
),
|
|
699
|
+
na_action="ignore",
|
|
700
|
+
)
|
|
597
701
|
|
|
598
702
|
return result
|
|
599
703
|
|
|
600
704
|
@classmethod
|
|
601
|
-
def component_evaluation(
|
|
602
|
-
|
|
603
|
-
|
|
705
|
+
def component_evaluation(
|
|
706
|
+
cls, operand: DataComponent, period_from: Optional[str], period_to: str, conf: str
|
|
707
|
+
) -> DataComponent:
|
|
604
708
|
result = cls.component_validation(operand, period_from, period_to, conf)
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
709
|
+
if operand.data is not None:
|
|
710
|
+
result.data = operand.data.map(
|
|
711
|
+
lambda x: cls._execute_time_aggregation(
|
|
712
|
+
x, operand.data_type, period_from, period_to, conf
|
|
713
|
+
),
|
|
714
|
+
na_action="ignore",
|
|
715
|
+
)
|
|
609
716
|
return result
|
|
610
717
|
|
|
611
718
|
@classmethod
|
|
612
|
-
def scalar_evaluation(
|
|
613
|
-
|
|
719
|
+
def scalar_evaluation(
|
|
720
|
+
cls, operand: Scalar, period_from: Optional[str], period_to: str, conf: str
|
|
721
|
+
) -> Scalar:
|
|
614
722
|
result = cls.scalar_validation(operand, period_from, period_to, conf)
|
|
615
|
-
result.value = cls._execute_time_aggregation(
|
|
616
|
-
|
|
723
|
+
result.value = cls._execute_time_aggregation(
|
|
724
|
+
operand.value, operand.data_type, period_from, period_to, conf
|
|
725
|
+
)
|
|
617
726
|
return result
|
|
618
727
|
|
|
619
728
|
@classmethod
|
|
620
|
-
def validate(
|
|
621
|
-
|
|
729
|
+
def validate(
|
|
730
|
+
cls,
|
|
731
|
+
operand: Union[Dataset, DataComponent, Scalar],
|
|
732
|
+
period_from: Optional[str],
|
|
733
|
+
period_to: str,
|
|
734
|
+
conf: str,
|
|
735
|
+
) -> Union[Dataset, DataComponent, Scalar]:
|
|
622
736
|
cls._check_params(period_from, period_to)
|
|
623
737
|
if isinstance(operand, Dataset):
|
|
624
738
|
return cls.dataset_validation(operand, period_from, period_to, conf)
|
|
@@ -628,9 +742,13 @@ class Time_Aggregation(Time):
|
|
|
628
742
|
return cls.scalar_validation(operand, period_from, period_to, conf)
|
|
629
743
|
|
|
630
744
|
@classmethod
|
|
631
|
-
def evaluate(
|
|
632
|
-
|
|
633
|
-
|
|
745
|
+
def evaluate(
|
|
746
|
+
cls,
|
|
747
|
+
operand: Union[Dataset, DataComponent, Scalar],
|
|
748
|
+
period_from: Optional[str],
|
|
749
|
+
period_to: str,
|
|
750
|
+
conf: str,
|
|
751
|
+
) -> Union[Dataset, DataComponent, Scalar]:
|
|
634
752
|
cls._check_params(period_from, period_to)
|
|
635
753
|
if isinstance(operand, Dataset):
|
|
636
754
|
return cls.dataset_evaluation(operand, period_from, period_to, conf)
|
|
@@ -640,7 +758,7 @@ class Time_Aggregation(Time):
|
|
|
640
758
|
return cls.scalar_evaluation(operand, period_from, period_to, conf)
|
|
641
759
|
|
|
642
760
|
|
|
643
|
-
def _time_period_access(v:
|
|
761
|
+
def _time_period_access(v: Any, to_param: str) -> Any:
|
|
644
762
|
v = TimePeriodHandler(v)
|
|
645
763
|
if v.period_indicator == to_param:
|
|
646
764
|
return str(v)
|
|
@@ -648,7 +766,7 @@ def _time_period_access(v: str, to_param: str) -> str:
|
|
|
648
766
|
return str(v)
|
|
649
767
|
|
|
650
768
|
|
|
651
|
-
def _date_access(v, to_param, start: bool):
|
|
769
|
+
def _date_access(v: str, to_param: str, start: bool) -> Any:
|
|
652
770
|
period_value = date_to_period(date.fromisoformat(v), to_param)
|
|
653
771
|
if start:
|
|
654
772
|
return period_value.start_date()
|
|
@@ -658,11 +776,11 @@ def _date_access(v, to_param, start: bool):
|
|
|
658
776
|
class Current_Date(Time):
|
|
659
777
|
|
|
660
778
|
@classmethod
|
|
661
|
-
def validate(cls):
|
|
662
|
-
return Scalar(name=
|
|
779
|
+
def validate(cls) -> Scalar:
|
|
780
|
+
return Scalar(name="current_date", data_type=Date, value=None)
|
|
663
781
|
|
|
664
782
|
@classmethod
|
|
665
|
-
def evaluate(cls):
|
|
783
|
+
def evaluate(cls) -> Scalar:
|
|
666
784
|
result = cls.validate()
|
|
667
785
|
result.value = date.today().isoformat()
|
|
668
786
|
return result
|