vtlengine 1.0__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +159 -102
- vtlengine/API/__init__.py +110 -68
- vtlengine/AST/ASTConstructor.py +188 -98
- vtlengine/AST/ASTConstructorModules/Expr.py +402 -205
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +248 -104
- vtlengine/AST/ASTConstructorModules/Terminals.py +158 -95
- vtlengine/AST/ASTEncoders.py +1 -1
- vtlengine/AST/ASTTemplate.py +24 -9
- vtlengine/AST/ASTVisitor.py +8 -12
- vtlengine/AST/DAG/__init__.py +43 -35
- vtlengine/AST/DAG/_words.py +4 -4
- vtlengine/AST/Grammar/Vtl.g4 +49 -20
- vtlengine/AST/Grammar/VtlTokens.g4 +13 -1
- vtlengine/AST/Grammar/lexer.py +2012 -1312
- vtlengine/AST/Grammar/parser.py +7524 -4343
- vtlengine/AST/Grammar/tokens.py +140 -128
- vtlengine/AST/VtlVisitor.py +16 -5
- vtlengine/AST/__init__.py +41 -11
- vtlengine/DataTypes/NumericTypesHandling.py +5 -4
- vtlengine/DataTypes/TimeHandling.py +196 -301
- vtlengine/DataTypes/__init__.py +304 -218
- vtlengine/Exceptions/__init__.py +96 -27
- vtlengine/Exceptions/messages.py +149 -69
- vtlengine/Interpreter/__init__.py +817 -497
- vtlengine/Model/__init__.py +172 -121
- vtlengine/Operators/Aggregation.py +156 -95
- vtlengine/Operators/Analytic.py +167 -79
- vtlengine/Operators/Assignment.py +7 -4
- vtlengine/Operators/Boolean.py +27 -32
- vtlengine/Operators/CastOperator.py +177 -131
- vtlengine/Operators/Clause.py +137 -99
- vtlengine/Operators/Comparison.py +148 -117
- vtlengine/Operators/Conditional.py +290 -98
- vtlengine/Operators/General.py +68 -47
- vtlengine/Operators/HROperators.py +91 -72
- vtlengine/Operators/Join.py +217 -118
- vtlengine/Operators/Numeric.py +129 -46
- vtlengine/Operators/RoleSetter.py +16 -15
- vtlengine/Operators/Set.py +61 -36
- vtlengine/Operators/String.py +213 -139
- vtlengine/Operators/Time.py +467 -215
- vtlengine/Operators/Validation.py +117 -76
- vtlengine/Operators/__init__.py +340 -213
- vtlengine/Utils/__init__.py +232 -41
- vtlengine/__init__.py +1 -1
- vtlengine/files/output/__init__.py +15 -6
- vtlengine/files/output/_time_period_representation.py +10 -9
- vtlengine/files/parser/__init__.py +79 -52
- vtlengine/files/parser/_rfc_dialect.py +6 -5
- vtlengine/files/parser/_time_checking.py +48 -37
- vtlengine-1.0.2.dist-info/METADATA +245 -0
- vtlengine-1.0.2.dist-info/RECORD +58 -0
- {vtlengine-1.0.dist-info → vtlengine-1.0.2.dist-info}/WHEEL +1 -1
- vtlengine-1.0.dist-info/METADATA +0 -104
- vtlengine-1.0.dist-info/RECORD +0 -58
- {vtlengine-1.0.dist-info → vtlengine-1.0.2.dist-info}/LICENSE.md +0 -0
vtlengine/Operators/Time.py
CHANGED
|
@@ -1,30 +1,35 @@
|
|
|
1
1
|
import re
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
2
4
|
from datetime import date
|
|
3
|
-
from
|
|
4
|
-
from typing import Optional, Union, List
|
|
5
|
+
from typing import Optional, Union, List, Any, Dict, Type
|
|
5
6
|
|
|
6
7
|
import vtlengine.Operators as Operators
|
|
7
|
-
import
|
|
8
|
-
from vtlengine.DataTypes import Date, TimePeriod, TimeInterval, Duration
|
|
8
|
+
from vtlengine.DataTypes import Date, TimePeriod, TimeInterval, Duration, ScalarType
|
|
9
9
|
from vtlengine.DataTypes.TimeHandling import DURATION_MAPPING, date_to_period, TimePeriodHandler
|
|
10
|
-
from dateutil.relativedelta import relativedelta
|
|
11
10
|
|
|
12
|
-
from vtlengine.AST.Grammar.tokens import
|
|
13
|
-
|
|
11
|
+
from vtlengine.AST.Grammar.tokens import (
|
|
12
|
+
TIME_AGG,
|
|
13
|
+
TIMESHIFT,
|
|
14
|
+
PERIOD_INDICATOR,
|
|
15
|
+
FILL_TIME_SERIES,
|
|
16
|
+
FLOW_TO_STOCK,
|
|
17
|
+
)
|
|
14
18
|
from vtlengine.Exceptions import SemanticError
|
|
15
19
|
from vtlengine.Model import Dataset, DataComponent, Scalar, Component, Role
|
|
16
20
|
|
|
17
21
|
|
|
18
22
|
class Time(Operators.Operator):
|
|
19
|
-
periods
|
|
20
|
-
time_id
|
|
21
|
-
other_ids
|
|
23
|
+
periods: Any
|
|
24
|
+
time_id: Any
|
|
25
|
+
other_ids: Any
|
|
26
|
+
measures: Any
|
|
22
27
|
|
|
23
28
|
TIME_DATA_TYPES = [Date, TimePeriod, TimeInterval]
|
|
24
29
|
|
|
25
|
-
FREQUENCY_MAP = {
|
|
26
|
-
YEAR_TO_PERIOD = {
|
|
27
|
-
PERIOD_ORDER = {
|
|
30
|
+
FREQUENCY_MAP = {"Y": "years", "M": "months", "D": "days"}
|
|
31
|
+
YEAR_TO_PERIOD = {"S": 2, "Q": 4, "M": 12, "W": 52, "D": 365}
|
|
32
|
+
PERIOD_ORDER = {"A": 0, "S": 1, "Q": 2, "M": 3, "W": 4, "D": 5}
|
|
28
33
|
|
|
29
34
|
op = FLOW_TO_STOCK
|
|
30
35
|
|
|
@@ -42,7 +47,7 @@ class Time(Operators.Operator):
|
|
|
42
47
|
def sort_by_time(cls, operand: Dataset) -> Optional[pd.DataFrame]:
|
|
43
48
|
time_id = cls._get_time_id(operand)
|
|
44
49
|
if time_id is None:
|
|
45
|
-
return
|
|
50
|
+
return None
|
|
46
51
|
ids = [id.name for id in operand.get_identifiers() if id.name != time_id]
|
|
47
52
|
ids.append(time_id)
|
|
48
53
|
if operand.data is None:
|
|
@@ -55,47 +60,55 @@ class Time(Operators.Operator):
|
|
|
55
60
|
return tp_value.period_indicator
|
|
56
61
|
|
|
57
62
|
@classmethod
|
|
58
|
-
def parse_date(cls, date_str):
|
|
63
|
+
def parse_date(cls, date_str: str) -> date:
|
|
59
64
|
return date.fromisoformat(date_str)
|
|
60
65
|
|
|
61
66
|
@classmethod
|
|
62
|
-
def get_frequencies(cls, dates):
|
|
63
|
-
dates =
|
|
64
|
-
dates =
|
|
65
|
-
|
|
67
|
+
def get_frequencies(cls, dates: Any) -> Any:
|
|
68
|
+
dates = pd.to_datetime(dates)
|
|
69
|
+
dates = dates.sort_values()
|
|
70
|
+
deltas = dates.diff().dropna()
|
|
71
|
+
return deltas
|
|
66
72
|
|
|
67
73
|
@classmethod
|
|
68
|
-
def find_min_frequency(cls, differences):
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
74
|
+
def find_min_frequency(cls, differences: Any) -> str:
|
|
75
|
+
months_deltas = differences.apply(lambda x: x.days // 30)
|
|
76
|
+
days_deltas = differences.apply(lambda x: x.days)
|
|
77
|
+
min_months = min(
|
|
78
|
+
(diff for diff in months_deltas if diff > 0 and diff % 12 != 0), default=None
|
|
79
|
+
)
|
|
80
|
+
min_days = min(
|
|
81
|
+
(diff for diff in days_deltas if diff > 0 and diff % 365 != 0 and diff % 366 != 0),
|
|
82
|
+
default=None,
|
|
83
|
+
)
|
|
84
|
+
return "D" if min_days else "M" if min_months else "Y"
|
|
72
85
|
|
|
73
86
|
@classmethod
|
|
74
|
-
def get_frequency_from_time(cls, interval):
|
|
75
|
-
start_date, end_date = interval.split(
|
|
87
|
+
def get_frequency_from_time(cls, interval: str) -> Any:
|
|
88
|
+
start_date, end_date = interval.split("/")
|
|
76
89
|
return date.fromisoformat(end_date) - date.fromisoformat(start_date)
|
|
77
90
|
|
|
78
91
|
@classmethod
|
|
79
|
-
def get_date_format(cls, date_str):
|
|
92
|
+
def get_date_format(cls, date_str: Union[str, date]) -> str:
|
|
80
93
|
date = cls.parse_date(date_str) if isinstance(date_str, str) else date_str
|
|
81
|
-
return
|
|
94
|
+
return "%Y-%m-%d" if date.day >= 1 else "%Y-%m" if date.month >= 1 else "%Y"
|
|
82
95
|
|
|
83
96
|
|
|
84
97
|
class Unary(Time):
|
|
85
98
|
|
|
86
99
|
@classmethod
|
|
87
|
-
def validate(cls, operand:
|
|
100
|
+
def validate(cls, operand: Any) -> Any:
|
|
88
101
|
if not isinstance(operand, Dataset):
|
|
89
102
|
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
|
|
90
103
|
if cls._get_time_id(operand) is None:
|
|
91
104
|
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
|
|
92
105
|
operand.data = cls.sort_by_time(operand)
|
|
93
|
-
return Dataset(name=
|
|
106
|
+
return Dataset(name="result", components=operand.components.copy(), data=None)
|
|
94
107
|
|
|
95
108
|
@classmethod
|
|
96
|
-
def evaluate(cls, operand:
|
|
109
|
+
def evaluate(cls, operand: Any) -> Any:
|
|
97
110
|
result = cls.validate(operand)
|
|
98
|
-
result.data = operand.data.copy()
|
|
111
|
+
result.data = operand.data.copy() if operand.data is not None else pd.DataFrame()
|
|
99
112
|
if len(operand.data) < 2:
|
|
100
113
|
return result
|
|
101
114
|
|
|
@@ -109,8 +122,11 @@ class Unary(Time):
|
|
|
109
122
|
if data_type == TimePeriod:
|
|
110
123
|
result.data = cls._period_accumulation(result.data, measure_names)
|
|
111
124
|
elif data_type == Date or data_type == TimeInterval:
|
|
112
|
-
result.data[measure_names] =
|
|
113
|
-
|
|
125
|
+
result.data[measure_names] = (
|
|
126
|
+
result.data.groupby(cls.other_ids)[measure_names]
|
|
127
|
+
.apply(cls.py_op)
|
|
128
|
+
.reset_index(drop=True)
|
|
129
|
+
)
|
|
114
130
|
else:
|
|
115
131
|
raise SemanticError("1-1-19-8", op=cls.op, comp_type="dataset", param="date type")
|
|
116
132
|
return result
|
|
@@ -118,32 +134,42 @@ class Unary(Time):
|
|
|
118
134
|
@classmethod
|
|
119
135
|
def _period_accumulation(cls, data: pd.DataFrame, measure_names: List[str]) -> pd.DataFrame:
|
|
120
136
|
data = data.copy()
|
|
121
|
-
data[
|
|
122
|
-
|
|
137
|
+
data["Period_group_col"] = (
|
|
138
|
+
data[cls.time_id].apply(cls._get_period).apply(lambda x: cls.PERIOD_ORDER[x])
|
|
139
|
+
)
|
|
140
|
+
result = data.groupby(cls.other_ids + ["Period_group_col"], group_keys=False)[
|
|
141
|
+
measure_names
|
|
142
|
+
].apply(cls.py_op)
|
|
123
143
|
data[measure_names] = result.reset_index(drop=True)
|
|
124
|
-
return data.drop(columns=
|
|
144
|
+
return data.drop(columns="Period_group_col")
|
|
125
145
|
|
|
126
146
|
|
|
127
147
|
class Binary(Time):
|
|
128
148
|
pass
|
|
129
149
|
|
|
130
150
|
|
|
151
|
+
class Parameterized(Time):
|
|
152
|
+
pass
|
|
153
|
+
|
|
154
|
+
|
|
131
155
|
class Period_indicator(Unary):
|
|
132
156
|
op = PERIOD_INDICATOR
|
|
133
157
|
|
|
134
158
|
@classmethod
|
|
135
|
-
def validate(cls, operand:
|
|
136
|
-
) -> Dataset | DataComponent | Scalar:
|
|
159
|
+
def validate(cls, operand: Any) -> Any:
|
|
137
160
|
if isinstance(operand, Dataset):
|
|
138
161
|
time_id = cls._get_time_id(operand)
|
|
139
162
|
if time_id is None or operand.components[time_id].data_type != TimePeriod:
|
|
140
163
|
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time period dataset")
|
|
141
|
-
result_components = {
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
164
|
+
result_components = {
|
|
165
|
+
comp.name: comp
|
|
166
|
+
for comp in operand.components.values()
|
|
167
|
+
if comp.role == Role.IDENTIFIER
|
|
168
|
+
}
|
|
169
|
+
result_components["duration_var"] = Component(
|
|
170
|
+
name="duration_var", data_type=Duration, role=Role.MEASURE, nullable=True
|
|
171
|
+
)
|
|
172
|
+
return Dataset(name="result", components=result_components, data=None)
|
|
147
173
|
# DataComponent and Scalar validation
|
|
148
174
|
if operand.data_type != TimePeriod:
|
|
149
175
|
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time period component")
|
|
@@ -152,8 +178,9 @@ class Period_indicator(Unary):
|
|
|
152
178
|
return Scalar(name=operand.name, data_type=Duration, value=None)
|
|
153
179
|
|
|
154
180
|
@classmethod
|
|
155
|
-
def evaluate(
|
|
156
|
-
|
|
181
|
+
def evaluate(
|
|
182
|
+
cls, operand: Union[Dataset, DataComponent, Scalar, str]
|
|
183
|
+
) -> Union[Dataset, DataComponent, Scalar, str]:
|
|
157
184
|
result = cls.validate(operand)
|
|
158
185
|
if isinstance(operand, str):
|
|
159
186
|
return cls._get_period(str(operand))
|
|
@@ -161,23 +188,43 @@ class Period_indicator(Unary):
|
|
|
161
188
|
result.value = cls._get_period(str(operand.value))
|
|
162
189
|
return result
|
|
163
190
|
if isinstance(operand, DataComponent):
|
|
164
|
-
|
|
191
|
+
if operand.data is not None:
|
|
192
|
+
result.data = operand.data.map(cls._get_period, na_action="ignore")
|
|
165
193
|
return result
|
|
166
194
|
cls.time_id = cls._get_time_id(operand)
|
|
195
|
+
result.data = (
|
|
196
|
+
operand.data.copy()[result.get_identifiers_names()]
|
|
197
|
+
if (operand.data is not None)
|
|
198
|
+
else pd.Series()
|
|
199
|
+
)
|
|
200
|
+
period_series: Any = result.data[cls.time_id].map(cls._get_period) # type: ignore[index]
|
|
201
|
+
result.data["duration_var"] = period_series
|
|
202
|
+
return result
|
|
167
203
|
|
|
168
|
-
result.data = operand.data.copy()[result.get_identifiers_names()]
|
|
169
|
-
period_series = result.data[cls.time_id].map(cls._get_period)
|
|
170
|
-
result.data['duration_var'] = period_series
|
|
171
204
|
|
|
172
|
-
|
|
205
|
+
class Parametrized(Time):
|
|
206
|
+
|
|
207
|
+
@classmethod
|
|
208
|
+
def validate(cls, operand: Any, param: Any) -> Any:
|
|
209
|
+
pass
|
|
210
|
+
|
|
211
|
+
@classmethod
|
|
212
|
+
def evaluate(cls, operand: Any, param: Any) -> Any:
|
|
213
|
+
pass
|
|
173
214
|
|
|
174
215
|
|
|
175
216
|
class Flow_to_stock(Unary):
|
|
176
|
-
|
|
217
|
+
|
|
218
|
+
@classmethod
|
|
219
|
+
def py_op(cls, x: Any) -> Any:
|
|
220
|
+
return x.cumsum().fillna(x)
|
|
177
221
|
|
|
178
222
|
|
|
179
223
|
class Stock_to_flow(Unary):
|
|
180
|
-
|
|
224
|
+
|
|
225
|
+
@classmethod
|
|
226
|
+
def py_op(cls, x: Any) -> Any:
|
|
227
|
+
return x.diff().fillna(x)
|
|
181
228
|
|
|
182
229
|
|
|
183
230
|
class Fill_time_series(Binary):
|
|
@@ -186,6 +233,8 @@ class Fill_time_series(Binary):
|
|
|
186
233
|
@classmethod
|
|
187
234
|
def evaluate(cls, operand: Dataset, fill_type: str) -> Dataset:
|
|
188
235
|
result = cls.validate(operand, fill_type)
|
|
236
|
+
if operand.data is None:
|
|
237
|
+
operand.data = pd.DataFrame()
|
|
189
238
|
result.data = operand.data.copy()
|
|
190
239
|
result.data[cls.time_id] = result.data[cls.time_id].astype(str)
|
|
191
240
|
if len(result.data) < 2:
|
|
@@ -195,16 +244,21 @@ class Fill_time_series(Binary):
|
|
|
195
244
|
result.data = cls.fill_periods(result.data, fill_type)
|
|
196
245
|
elif data_type == Date:
|
|
197
246
|
frequencies = cls.get_frequencies(operand.data[cls.time_id].apply(cls.parse_date))
|
|
198
|
-
result.data = cls.fill_dates(
|
|
199
|
-
|
|
247
|
+
result.data = cls.fill_dates(
|
|
248
|
+
result.data, fill_type, cls.find_min_frequency(frequencies)
|
|
249
|
+
)
|
|
200
250
|
elif data_type == TimeInterval:
|
|
201
251
|
frequencies = result.data[cls.time_id].apply(cls.get_frequency_from_time).unique()
|
|
202
252
|
if len(frequencies) > 1:
|
|
203
|
-
raise SemanticError(
|
|
204
|
-
|
|
253
|
+
raise SemanticError(
|
|
254
|
+
"1-1-19-9",
|
|
255
|
+
op=cls.op,
|
|
256
|
+
comp_type="dataset",
|
|
257
|
+
param="single time interval frequency",
|
|
258
|
+
)
|
|
205
259
|
result.data = cls.fill_time_intervals(result.data, fill_type, frequencies[0])
|
|
206
260
|
else:
|
|
207
|
-
raise SemanticError("1-1-19-
|
|
261
|
+
raise SemanticError("1-1-19-2", op=cls.op)
|
|
208
262
|
return result
|
|
209
263
|
|
|
210
264
|
@classmethod
|
|
@@ -216,50 +270,48 @@ class Fill_time_series(Binary):
|
|
|
216
270
|
cls.measures = operand.get_measures_names()
|
|
217
271
|
if cls.time_id is None:
|
|
218
272
|
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
|
|
219
|
-
if fill_type not in [
|
|
220
|
-
fill_type =
|
|
221
|
-
return Dataset(name=
|
|
273
|
+
if fill_type not in ["all", "single"]:
|
|
274
|
+
fill_type = "all"
|
|
275
|
+
return Dataset(name="result", components=operand.components.copy(), data=None)
|
|
222
276
|
|
|
223
277
|
@classmethod
|
|
224
|
-
def max_min_from_period(cls, data, mode=
|
|
278
|
+
def max_min_from_period(cls, data: pd.DataFrame, mode: str = "all") -> Dict[str, Any]:
|
|
225
279
|
|
|
280
|
+
result_dict: Dict[Any, Any] = {}
|
|
226
281
|
data = data.assign(
|
|
227
282
|
Periods_col=data[cls.time_id].apply(cls._get_period),
|
|
228
283
|
Periods_values_col=data[cls.time_id].apply(
|
|
229
|
-
lambda x: int(re.sub(r
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
elif mode == 'single':
|
|
246
|
-
result_dict = {}
|
|
247
|
-
for name, group in data.groupby(cls.other_ids + ['Periods_col']):
|
|
284
|
+
lambda x: int(re.sub(r"[^\d]", "", x.split("-")[-1]))
|
|
285
|
+
),
|
|
286
|
+
Year_values_col=data[cls.time_id].apply(lambda x: int(x.split("-")[0])),
|
|
287
|
+
).sort_values(by=["Year_values_col", "Periods_col", "Periods_values_col"])
|
|
288
|
+
|
|
289
|
+
if mode == "all":
|
|
290
|
+
min_year = data["Year_values_col"].min()
|
|
291
|
+
max_year = data["Year_values_col"].max()
|
|
292
|
+
result_dict = {"min": {"A": min_year}, "max": {"A": max_year}}
|
|
293
|
+
for period, group in data.groupby("Periods_col"):
|
|
294
|
+
if period != "A":
|
|
295
|
+
result_dict["min"][period] = group["Periods_values_col"].min()
|
|
296
|
+
result_dict["max"][period] = group["Periods_values_col"].max()
|
|
297
|
+
|
|
298
|
+
elif mode == "single":
|
|
299
|
+
for name, group in data.groupby(cls.other_ids + ["Periods_col"]):
|
|
248
300
|
key = name[:-1] if len(name[:-1]) > 1 else name[0]
|
|
249
301
|
period = name[-1]
|
|
250
302
|
if key not in result_dict:
|
|
251
303
|
result_dict[key] = {
|
|
252
|
-
|
|
253
|
-
|
|
304
|
+
"min": {"A": group["Year_values_col"].min()},
|
|
305
|
+
"max": {"A": group["Year_values_col"].max()},
|
|
254
306
|
}
|
|
255
|
-
if period !=
|
|
256
|
-
year_min = group[
|
|
257
|
-
year_max = group[
|
|
307
|
+
if period != "A":
|
|
308
|
+
year_min = group["Year_values_col"].min()
|
|
309
|
+
year_max = group["Year_values_col"].max()
|
|
258
310
|
|
|
259
|
-
result_dict[key][
|
|
260
|
-
result_dict[key][
|
|
261
|
-
result_dict[key][
|
|
262
|
-
result_dict[key][
|
|
311
|
+
result_dict[key]["min"]["A"] = min(result_dict[key]["min"]["A"], year_min)
|
|
312
|
+
result_dict[key]["max"]["A"] = max(result_dict[key]["max"]["A"], year_max)
|
|
313
|
+
result_dict[key]["min"][period] = group["Periods_values_col"].min()
|
|
314
|
+
result_dict[key]["max"][period] = group["Periods_values_col"].max()
|
|
263
315
|
|
|
264
316
|
else:
|
|
265
317
|
raise ValueError("Mode must be either 'all' or 'single'")
|
|
@@ -267,189 +319,226 @@ class Fill_time_series(Binary):
|
|
|
267
319
|
|
|
268
320
|
@classmethod
|
|
269
321
|
def fill_periods(cls, data: pd.DataFrame, fill_type: str) -> pd.DataFrame:
|
|
270
|
-
result_data = cls.period_filler(data, single=(fill_type !=
|
|
322
|
+
result_data = cls.period_filler(data, single=(fill_type != "all"))
|
|
271
323
|
not_na = result_data[cls.measures].notna().any(axis=1)
|
|
272
324
|
duplicated = result_data.duplicated(subset=(cls.other_ids + [cls.time_id]), keep=False)
|
|
273
325
|
return result_data[~duplicated | not_na]
|
|
274
326
|
|
|
275
327
|
@classmethod
|
|
276
|
-
def period_filler(cls, data: pd.DataFrame, single=False) -> pd.DataFrame:
|
|
328
|
+
def period_filler(cls, data: pd.DataFrame, single: bool = False) -> pd.DataFrame:
|
|
277
329
|
filled_data = []
|
|
278
|
-
MAX_MIN = cls.max_min_from_period(data, mode=
|
|
279
|
-
cls.periods =
|
|
280
|
-
MAX_MIN[
|
|
330
|
+
MAX_MIN = cls.max_min_from_period(data, mode="single" if single else "all")
|
|
331
|
+
cls.periods = (
|
|
332
|
+
list(MAX_MIN[list(MAX_MIN.keys())[0]]["min"].keys())
|
|
333
|
+
if single
|
|
334
|
+
else list(MAX_MIN["min"].keys())
|
|
335
|
+
)
|
|
281
336
|
groups = data.groupby(cls.other_ids)
|
|
282
337
|
|
|
283
338
|
for group, group_df in groups:
|
|
284
|
-
period_limits =
|
|
285
|
-
|
|
339
|
+
period_limits = (
|
|
340
|
+
MAX_MIN if not single else MAX_MIN[group if len(group) > 1 else group[0]]
|
|
341
|
+
)
|
|
342
|
+
years = list(range(period_limits["min"]["A"], period_limits["max"]["A"] + 1))
|
|
286
343
|
for period in cls.periods:
|
|
287
|
-
if period ==
|
|
344
|
+
if period == "A":
|
|
288
345
|
filled_data.extend(cls.fill_periods_rows(group_df, period, years))
|
|
289
346
|
else:
|
|
290
|
-
if period in period_limits[
|
|
291
|
-
vals =
|
|
347
|
+
if period in period_limits["min"] and period in period_limits["max"]:
|
|
348
|
+
vals = list(
|
|
349
|
+
range(period_limits["min"][period], period_limits["max"][period] + 1)
|
|
350
|
+
)
|
|
292
351
|
filled_data.extend(
|
|
293
|
-
cls.fill_periods_rows(group_df, period, years, vals=vals)
|
|
352
|
+
cls.fill_periods_rows(group_df, period, years, vals=vals)
|
|
353
|
+
)
|
|
294
354
|
|
|
295
355
|
filled_data = pd.concat(filled_data, ignore_index=True)
|
|
296
|
-
combined_data = pd.concat([filled_data, data], ignore_index=True)
|
|
297
|
-
if len(cls.periods) == 1 and cls.periods[0] ==
|
|
356
|
+
combined_data = pd.concat([filled_data, data], ignore_index=True) # type: ignore[list-item]
|
|
357
|
+
if len(cls.periods) == 1 and cls.periods[0] == "A":
|
|
298
358
|
combined_data[cls.time_id] = combined_data[cls.time_id].astype(int)
|
|
299
359
|
else:
|
|
300
360
|
combined_data[cls.time_id] = combined_data[cls.time_id].astype(str)
|
|
301
361
|
return combined_data.sort_values(by=cls.other_ids + [cls.time_id])
|
|
302
362
|
|
|
303
363
|
@classmethod
|
|
304
|
-
def fill_periods_rows(
|
|
364
|
+
def fill_periods_rows(
|
|
365
|
+
cls, group_df: Any, period: str, years: List[int], vals: Optional[List[int]] = None
|
|
366
|
+
) -> List[Any]:
|
|
305
367
|
rows = []
|
|
306
368
|
for year in years:
|
|
307
|
-
if period ==
|
|
369
|
+
if period == "A":
|
|
308
370
|
rows.append(cls.create_period_row(group_df, period, year))
|
|
309
|
-
|
|
371
|
+
elif vals is not None:
|
|
310
372
|
for val in vals:
|
|
311
373
|
rows.append(cls.create_period_row(group_df, period, year, val=val))
|
|
312
374
|
return rows
|
|
313
375
|
|
|
314
376
|
@classmethod
|
|
315
|
-
def create_period_row(
|
|
377
|
+
def create_period_row(
|
|
378
|
+
cls, group_df: Any, period: str, year: int, val: Optional[int] = None
|
|
379
|
+
) -> Any:
|
|
316
380
|
row = group_df.iloc[0].copy()
|
|
317
|
-
row[cls.time_id] = f"{year}" if period ==
|
|
381
|
+
row[cls.time_id] = f"{year}" if period == "A" else f"{year}-{period}{val:d}"
|
|
318
382
|
row[cls.measures] = None
|
|
319
383
|
return row.to_frame().T
|
|
320
384
|
|
|
321
385
|
@classmethod
|
|
322
|
-
def max_min_from_date(cls, data, fill_type=
|
|
323
|
-
def compute_min_max(group):
|
|
386
|
+
def max_min_from_date(cls, data: pd.DataFrame, fill_type: str = "all") -> Dict[str, Any]:
|
|
387
|
+
def compute_min_max(group: Any) -> Dict[str, Any]:
|
|
324
388
|
min_date = cls.parse_date(group.min())
|
|
325
389
|
max_date = cls.parse_date(group.max())
|
|
326
390
|
date_format = cls.get_date_format(max_date)
|
|
327
|
-
return {
|
|
391
|
+
return {"min": min_date, "max": max_date, "date_format": date_format}
|
|
328
392
|
|
|
329
|
-
if fill_type ==
|
|
393
|
+
if fill_type == "all":
|
|
330
394
|
return compute_min_max(data[cls.time_id])
|
|
331
395
|
|
|
332
396
|
grouped = data.groupby(cls.other_ids)
|
|
333
|
-
result_dict = {
|
|
334
|
-
|
|
335
|
-
|
|
397
|
+
result_dict = {
|
|
398
|
+
name if len(name) > 1 else name[0]: compute_min_max(group[cls.time_id])
|
|
399
|
+
for name, group in grouped
|
|
400
|
+
}
|
|
336
401
|
return result_dict
|
|
337
402
|
|
|
338
403
|
@classmethod
|
|
339
|
-
def fill_dates(cls, data: pd.DataFrame, fill_type, min_frequency) -> pd.DataFrame:
|
|
404
|
+
def fill_dates(cls, data: pd.DataFrame, fill_type: str, min_frequency: str) -> pd.DataFrame:
|
|
340
405
|
result_data = cls.date_filler(data, fill_type, min_frequency)
|
|
341
406
|
not_na = result_data[cls.measures].notna().any(axis=1)
|
|
342
407
|
duplicated = result_data.duplicated(subset=(cls.other_ids + [cls.time_id]), keep=False)
|
|
343
408
|
return result_data[~duplicated | not_na]
|
|
344
409
|
|
|
345
410
|
@classmethod
|
|
346
|
-
def date_filler(cls, data: pd.DataFrame, fill_type, min_frequency) -> pd.DataFrame:
|
|
411
|
+
def date_filler(cls, data: pd.DataFrame, fill_type: str, min_frequency: str) -> pd.DataFrame:
|
|
347
412
|
MAX_MIN = cls.max_min_from_date(data, fill_type)
|
|
348
413
|
date_format = None
|
|
349
414
|
filled_data = []
|
|
350
415
|
|
|
351
|
-
def create_filled_dates(
|
|
352
|
-
|
|
416
|
+
def create_filled_dates(
|
|
417
|
+
group: Any, min_max: Dict[str, Any]
|
|
418
|
+
) -> (pd.DataFrame, str): # type: ignore[syntax]
|
|
419
|
+
date_range = pd.date_range(start=min_max["min"], end=min_max["max"], freq=min_frequency)
|
|
353
420
|
date_df = pd.DataFrame(date_range, columns=[cls.time_id])
|
|
354
421
|
date_df[cls.other_ids] = group.iloc[0][cls.other_ids]
|
|
355
422
|
date_df[cls.measures] = None
|
|
356
|
-
return date_df, min_max[
|
|
423
|
+
return date_df, min_max["date_format"]
|
|
357
424
|
|
|
358
425
|
for name, group in data.groupby(cls.other_ids):
|
|
359
|
-
min_max = MAX_MIN if fill_type ==
|
|
426
|
+
min_max = MAX_MIN if fill_type == "all" else MAX_MIN[name if len(name) > 1 else name[0]]
|
|
360
427
|
filled_dates, date_format = create_filled_dates(group, min_max)
|
|
361
428
|
filled_data.append(filled_dates)
|
|
362
429
|
|
|
363
430
|
filled_data = pd.concat(filled_data, ignore_index=True)
|
|
364
431
|
filled_data[cls.time_id] = filled_data[cls.time_id].dt.strftime(date_format)
|
|
365
|
-
combined_data = pd.concat([filled_data, data], ignore_index=True)
|
|
432
|
+
combined_data = pd.concat([filled_data, data], ignore_index=True) # type: ignore[list-item]
|
|
366
433
|
combined_data[cls.time_id] = combined_data[cls.time_id].astype(str)
|
|
367
434
|
return combined_data.sort_values(by=cls.other_ids + [cls.time_id])
|
|
368
435
|
|
|
369
436
|
@classmethod
|
|
370
|
-
def max_min_from_time(cls, data, fill_type=
|
|
371
|
-
data = data.applymap(str).sort_values(
|
|
437
|
+
def max_min_from_time(cls, data: pd.DataFrame, fill_type: str = "all") -> Dict[str, Any]:
|
|
438
|
+
data = data.applymap(str).sort_values( # type: ignore[operator]
|
|
439
|
+
by=cls.other_ids + [cls.time_id]
|
|
440
|
+
)
|
|
372
441
|
|
|
373
|
-
def extract_max_min(group):
|
|
374
|
-
start_dates = group.apply(lambda x: x.split(
|
|
375
|
-
end_dates = group.apply(lambda x: x.split(
|
|
376
|
-
return {
|
|
377
|
-
|
|
442
|
+
def extract_max_min(group: Any) -> Dict[str, Any]:
|
|
443
|
+
start_dates = group.apply(lambda x: x.split("/")[0])
|
|
444
|
+
end_dates = group.apply(lambda x: x.split("/")[1])
|
|
445
|
+
return {
|
|
446
|
+
"start": {"min": start_dates.min(), "max": start_dates.max()},
|
|
447
|
+
"end": {"min": end_dates.min(), "max": end_dates.max()},
|
|
448
|
+
}
|
|
378
449
|
|
|
379
|
-
if fill_type ==
|
|
450
|
+
if fill_type == "all":
|
|
380
451
|
return extract_max_min(data[cls.time_id])
|
|
381
452
|
else:
|
|
382
|
-
return {
|
|
383
|
-
|
|
453
|
+
return {
|
|
454
|
+
name: extract_max_min(group[cls.time_id])
|
|
455
|
+
for name, group in data.groupby(cls.other_ids)
|
|
456
|
+
}
|
|
384
457
|
|
|
385
458
|
@classmethod
|
|
386
|
-
def fill_time_intervals(
|
|
387
|
-
|
|
459
|
+
def fill_time_intervals(
|
|
460
|
+
cls, data: pd.DataFrame, fill_type: str, frequency: str
|
|
461
|
+
) -> pd.DataFrame:
|
|
388
462
|
result_data = cls.time_filler(data, fill_type, frequency)
|
|
389
463
|
not_na = result_data[cls.measures].notna().any(axis=1)
|
|
390
464
|
duplicated = result_data.duplicated(subset=(cls.other_ids + [cls.time_id]), keep=False)
|
|
391
465
|
return result_data[~duplicated | not_na]
|
|
392
466
|
|
|
393
467
|
@classmethod
|
|
394
|
-
def time_filler(cls, data: pd.DataFrame, fill_type,
|
|
395
|
-
frequency) -> pd.DataFrame:
|
|
468
|
+
def time_filler(cls, data: pd.DataFrame, fill_type: str, frequency: str) -> pd.DataFrame:
|
|
396
469
|
MAX_MIN = cls.max_min_from_time(data, fill_type)
|
|
397
470
|
|
|
398
|
-
def fill_group(group_df):
|
|
471
|
+
def fill_group(group_df: pd.DataFrame) -> pd.DataFrame:
|
|
399
472
|
group_key = group_df.iloc[0][cls.other_ids].values
|
|
400
|
-
if fill_type !=
|
|
473
|
+
if fill_type != "all":
|
|
401
474
|
group_key = group_key[0] if len(group_key) == 1 else tuple(group_key)
|
|
402
|
-
group_dict = MAX_MIN if fill_type ==
|
|
475
|
+
group_dict = MAX_MIN if fill_type == "all" else MAX_MIN[group_key]
|
|
403
476
|
|
|
404
|
-
intervals = [
|
|
405
|
-
|
|
477
|
+
intervals = [
|
|
478
|
+
f"{group_dict['start']['min']}/{group_dict['end']['min']}",
|
|
479
|
+
f"{group_dict['start']['max']}/{group_dict['end']['max']}",
|
|
480
|
+
]
|
|
406
481
|
for interval in intervals:
|
|
407
482
|
if interval not in group_df[cls.time_id].values:
|
|
408
483
|
empty_row = group_df.iloc[0].copy()
|
|
409
484
|
empty_row[cls.time_id] = interval
|
|
410
485
|
empty_row[cls.measures] = None
|
|
411
|
-
group_df = group_df.append(
|
|
486
|
+
group_df = group_df.append( # type: ignore[operator]
|
|
487
|
+
empty_row, ignore_index=True
|
|
488
|
+
)
|
|
412
489
|
start_group_df = group_df.copy()
|
|
413
490
|
start_group_df[cls.time_id] = start_group_df[cls.time_id].apply(
|
|
414
|
-
lambda x: x.split(
|
|
491
|
+
lambda x: x.split("/")[0]
|
|
492
|
+
)
|
|
415
493
|
end_group_df = group_df.copy()
|
|
416
|
-
end_group_df[cls.time_id] = end_group_df[cls.time_id].apply(lambda x: x.split(
|
|
494
|
+
end_group_df[cls.time_id] = end_group_df[cls.time_id].apply(lambda x: x.split("/")[1])
|
|
417
495
|
start_filled = cls.date_filler(start_group_df, fill_type, frequency)
|
|
418
496
|
end_filled = cls.date_filler(end_group_df, fill_type, frequency)
|
|
419
|
-
start_filled[cls.time_id] = start_filled[cls.time_id].str.cat(
|
|
420
|
-
|
|
497
|
+
start_filled[cls.time_id] = start_filled[cls.time_id].str.cat(
|
|
498
|
+
end_filled[cls.time_id], sep="/"
|
|
499
|
+
)
|
|
421
500
|
return start_filled
|
|
422
501
|
|
|
423
502
|
filled_data = [fill_group(group_df) for _, group_df in data.groupby(cls.other_ids)]
|
|
424
|
-
return
|
|
425
|
-
|
|
503
|
+
return (
|
|
504
|
+
pd.concat(filled_data, ignore_index=True)
|
|
505
|
+
.sort_values(by=cls.other_ids + [cls.time_id])
|
|
506
|
+
.drop_duplicates()
|
|
507
|
+
)
|
|
426
508
|
|
|
427
509
|
|
|
428
510
|
class Time_Shift(Binary):
|
|
429
511
|
op = TIMESHIFT
|
|
430
512
|
|
|
431
513
|
@classmethod
|
|
432
|
-
def evaluate(cls, operand: Dataset, shift_value:
|
|
514
|
+
def evaluate(cls, operand: Dataset, shift_value: Any) -> Dataset:
|
|
433
515
|
result = cls.validate(operand, shift_value)
|
|
434
|
-
result.data = operand.data.copy()
|
|
516
|
+
result.data = operand.data.copy() if operand.data is not None else pd.DataFrame()
|
|
435
517
|
shift_value = int(shift_value.value)
|
|
436
518
|
cls.time_id = cls._get_time_id(result)
|
|
437
|
-
|
|
519
|
+
|
|
520
|
+
data_type: Any = (
|
|
521
|
+
result.components[cls.time_id].data_type if isinstance(cls.time_id, str) else None
|
|
522
|
+
)
|
|
438
523
|
|
|
439
524
|
if data_type == Date:
|
|
440
525
|
freq = cls.find_min_frequency(
|
|
441
|
-
cls.get_frequencies(
|
|
442
|
-
|
|
443
|
-
|
|
526
|
+
cls.get_frequencies(
|
|
527
|
+
result.data[cls.time_id].map(cls.parse_date, na_action="ignore")
|
|
528
|
+
)
|
|
529
|
+
)
|
|
530
|
+
result.data[cls.time_id] = cls.shift_dates(result.data[cls.time_id], shift_value, freq)
|
|
444
531
|
elif data_type == Time:
|
|
445
532
|
freq = cls.get_frequency_from_time(result.data[cls.time_id].iloc[0])
|
|
446
533
|
result.data[cls.time_id] = result.data[cls.time_id].apply(
|
|
447
|
-
lambda x: cls.shift_interval(x, shift_value, freq)
|
|
534
|
+
lambda x: cls.shift_interval(x, shift_value, freq)
|
|
535
|
+
)
|
|
448
536
|
elif data_type == TimePeriod:
|
|
449
537
|
periods = result.data[cls.time_id].apply(cls._get_period).unique()
|
|
450
538
|
result.data[cls.time_id] = result.data[cls.time_id].apply(
|
|
451
|
-
lambda x: cls.shift_period(x, shift_value)
|
|
452
|
-
|
|
539
|
+
lambda x: cls.shift_period(x, shift_value)
|
|
540
|
+
)
|
|
541
|
+
if len(periods) == 1 and periods[0] == "A":
|
|
453
542
|
result.data[cls.time_id] = result.data[cls.time_id].astype(int)
|
|
454
543
|
else:
|
|
455
544
|
raise SemanticError("1-1-19-2", op=cls.op)
|
|
@@ -457,27 +546,41 @@ class Time_Shift(Binary):
|
|
|
457
546
|
|
|
458
547
|
@classmethod
|
|
459
548
|
def validate(cls, operand: Dataset, shift_value: str) -> Dataset:
|
|
460
|
-
if
|
|
549
|
+
if cls._get_time_id(operand) is None:
|
|
461
550
|
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
|
|
462
|
-
|
|
463
|
-
return Dataset(name='result', components=operand.components.copy(), data=None)
|
|
551
|
+
return Dataset(name="result", components=operand.components.copy(), data=None)
|
|
464
552
|
|
|
465
553
|
@classmethod
|
|
466
|
-
def
|
|
467
|
-
|
|
554
|
+
def shift_dates(cls, dates: Any, shift_value: int, frequency: str) -> Any:
|
|
555
|
+
dates = pd.to_datetime(dates)
|
|
556
|
+
if frequency == "D":
|
|
557
|
+
return dates + pd.to_timedelta(shift_value, unit="D")
|
|
558
|
+
elif frequency == "W":
|
|
559
|
+
return dates + pd.to_timedelta(shift_value, unit="W")
|
|
560
|
+
elif frequency == "Y":
|
|
561
|
+
return dates + pd.DateOffset(years=shift_value)
|
|
562
|
+
elif frequency in ["M", "Q", "S"]:
|
|
563
|
+
return dates + pd.DateOffset(months=shift_value)
|
|
564
|
+
raise SemanticError("2-1-19-2", period=frequency)
|
|
468
565
|
|
|
469
566
|
@classmethod
|
|
470
|
-
def shift_period(
|
|
567
|
+
def shift_period(
|
|
568
|
+
cls, period_str: str, shift_value: int, frequency: Optional[int] = None
|
|
569
|
+
) -> str:
|
|
471
570
|
period_type = cls._get_period(period_str)
|
|
472
571
|
|
|
473
|
-
if period_type ==
|
|
572
|
+
if period_type == "A":
|
|
474
573
|
return str(int(period_str) + shift_value)
|
|
475
574
|
|
|
476
575
|
if frequency:
|
|
477
576
|
shift_value *= frequency
|
|
478
577
|
|
|
479
578
|
tp_value = TimePeriodHandler(period_str)
|
|
480
|
-
year, period, value =
|
|
579
|
+
year, period, value = (
|
|
580
|
+
tp_value.year,
|
|
581
|
+
tp_value.period_indicator,
|
|
582
|
+
tp_value.period_number + shift_value,
|
|
583
|
+
)
|
|
481
584
|
period_limit = cls.YEAR_TO_PERIOD[period]
|
|
482
585
|
|
|
483
586
|
if value <= 0:
|
|
@@ -490,23 +593,23 @@ class Time_Shift(Binary):
|
|
|
490
593
|
return f"{year}-{period}{value}"
|
|
491
594
|
|
|
492
595
|
@classmethod
|
|
493
|
-
def shift_interval(cls, interval, shift_value, frequency):
|
|
494
|
-
start_date, end_date = interval.split(
|
|
495
|
-
start_date = cls.
|
|
496
|
-
end_date = cls.
|
|
497
|
-
return f
|
|
596
|
+
def shift_interval(cls, interval: str, shift_value: Any, frequency: str) -> str:
|
|
597
|
+
start_date, end_date = interval.split("/")
|
|
598
|
+
start_date = cls.shift_dates(start_date, shift_value, frequency)
|
|
599
|
+
end_date = cls.shift_dates(end_date, shift_value, frequency)
|
|
600
|
+
return f"{start_date}/{end_date}"
|
|
498
601
|
|
|
499
602
|
|
|
500
603
|
class Time_Aggregation(Time):
|
|
501
604
|
op = TIME_AGG
|
|
502
605
|
|
|
503
606
|
@classmethod
|
|
504
|
-
def _check_duration(cls, value: str):
|
|
607
|
+
def _check_duration(cls, value: str) -> None:
|
|
505
608
|
if value not in DURATION_MAPPING:
|
|
506
609
|
raise SemanticError("1-1-19-3", op=cls.op, param="duration")
|
|
507
610
|
|
|
508
611
|
@classmethod
|
|
509
|
-
def _check_params(cls, period_from: Optional[str], period_to: str):
|
|
612
|
+
def _check_params(cls, period_from: Optional[str], period_to: str) -> None:
|
|
510
613
|
cls._check_duration(period_to)
|
|
511
614
|
if period_from is not None:
|
|
512
615
|
cls._check_duration(period_from)
|
|
@@ -515,8 +618,9 @@ class Time_Aggregation(Time):
|
|
|
515
618
|
raise SemanticError("1-1-19-4", op=cls.op, value_1=period_from, value_2=period_to)
|
|
516
619
|
|
|
517
620
|
@classmethod
|
|
518
|
-
def dataset_validation(
|
|
519
|
-
|
|
621
|
+
def dataset_validation(
|
|
622
|
+
cls, operand: Dataset, period_from: Optional[str], period_to: str, conf: str
|
|
623
|
+
) -> Dataset:
|
|
520
624
|
# TODO: Review with VTL TF as this makes no sense
|
|
521
625
|
|
|
522
626
|
count_time_types = 0
|
|
@@ -526,29 +630,34 @@ class Time_Aggregation(Time):
|
|
|
526
630
|
if measure.data_type == TimePeriod and period_to == "D":
|
|
527
631
|
raise SemanticError("1-1-19-5", op=cls.op)
|
|
528
632
|
if measure.data_type == TimeInterval:
|
|
529
|
-
raise SemanticError("1-1-19-6", op=cls.op,
|
|
530
|
-
comp=measure.name)
|
|
633
|
+
raise SemanticError("1-1-19-6", op=cls.op, comp=measure.name)
|
|
531
634
|
|
|
532
635
|
count_time_types = 0
|
|
533
636
|
for id_ in operand.get_identifiers():
|
|
534
637
|
if id_.data_type in cls.TIME_DATA_TYPES:
|
|
535
638
|
count_time_types += 1
|
|
536
639
|
if count_time_types != 1:
|
|
537
|
-
raise SemanticError(
|
|
538
|
-
|
|
640
|
+
raise SemanticError(
|
|
641
|
+
"1-1-19-9", op=cls.op, comp_type="dataset", param="single time identifier"
|
|
642
|
+
)
|
|
539
643
|
|
|
540
644
|
if count_time_types != 1:
|
|
541
|
-
raise SemanticError(
|
|
542
|
-
|
|
645
|
+
raise SemanticError(
|
|
646
|
+
"1-1-19-9", op=cls.op, comp_type="dataset", param="single time measure"
|
|
647
|
+
)
|
|
543
648
|
|
|
544
|
-
result_components = {
|
|
545
|
-
|
|
649
|
+
result_components = {
|
|
650
|
+
comp.name: comp
|
|
651
|
+
for comp in operand.components.values()
|
|
652
|
+
if comp.role in [Role.IDENTIFIER, Role.MEASURE]
|
|
653
|
+
}
|
|
546
654
|
|
|
547
655
|
return Dataset(name=operand.name, components=result_components, data=None)
|
|
548
656
|
|
|
549
657
|
@classmethod
|
|
550
|
-
def component_validation(
|
|
551
|
-
|
|
658
|
+
def component_validation(
|
|
659
|
+
cls, operand: DataComponent, period_from: Optional[str], period_to: str, conf: str
|
|
660
|
+
) -> DataComponent:
|
|
552
661
|
if operand.data_type not in cls.TIME_DATA_TYPES:
|
|
553
662
|
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time component")
|
|
554
663
|
if operand.data_type == TimePeriod and period_to == "D":
|
|
@@ -559,16 +668,23 @@ class Time_Aggregation(Time):
|
|
|
559
668
|
return DataComponent(name=operand.name, data_type=operand.data_type, data=None)
|
|
560
669
|
|
|
561
670
|
@classmethod
|
|
562
|
-
def scalar_validation(
|
|
563
|
-
|
|
671
|
+
def scalar_validation(
|
|
672
|
+
cls, operand: Scalar, period_from: Optional[str], period_to: str, conf: str
|
|
673
|
+
) -> Scalar:
|
|
564
674
|
if operand.data_type not in cls.TIME_DATA_TYPES:
|
|
565
675
|
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time scalar")
|
|
566
676
|
|
|
567
677
|
return Scalar(name=operand.name, data_type=operand.data_type, value=None)
|
|
568
678
|
|
|
569
679
|
@classmethod
|
|
570
|
-
def _execute_time_aggregation(
|
|
571
|
-
|
|
680
|
+
def _execute_time_aggregation(
|
|
681
|
+
cls,
|
|
682
|
+
value: str,
|
|
683
|
+
data_type: Type[ScalarType],
|
|
684
|
+
period_from: Optional[str],
|
|
685
|
+
period_to: str,
|
|
686
|
+
conf: str,
|
|
687
|
+
) -> str:
|
|
572
688
|
if data_type == TimePeriod: # Time period
|
|
573
689
|
return _time_period_access(value, period_to)
|
|
574
690
|
|
|
@@ -585,40 +701,53 @@ class Time_Aggregation(Time):
|
|
|
585
701
|
raise NotImplementedError
|
|
586
702
|
|
|
587
703
|
@classmethod
|
|
588
|
-
def dataset_evaluation(
|
|
589
|
-
|
|
704
|
+
def dataset_evaluation(
|
|
705
|
+
cls, operand: Dataset, period_from: Optional[str], period_to: str, conf: str
|
|
706
|
+
) -> Dataset:
|
|
590
707
|
result = cls.dataset_validation(operand, period_from, period_to, conf)
|
|
591
|
-
result.data = operand.data.copy()
|
|
708
|
+
result.data = operand.data.copy() if operand.data is not None else pd.DataFrame()
|
|
592
709
|
time_measure = [m for m in operand.get_measures() if m.data_type in cls.TIME_DATA_TYPES][0]
|
|
593
710
|
result.data[time_measure.name] = result.data[time_measure.name].map(
|
|
594
|
-
lambda x: cls._execute_time_aggregation(
|
|
595
|
-
|
|
596
|
-
|
|
711
|
+
lambda x: cls._execute_time_aggregation(
|
|
712
|
+
x, time_measure.data_type, period_from, period_to, conf
|
|
713
|
+
),
|
|
714
|
+
na_action="ignore",
|
|
715
|
+
)
|
|
597
716
|
|
|
598
717
|
return result
|
|
599
718
|
|
|
600
719
|
@classmethod
|
|
601
|
-
def component_evaluation(
|
|
602
|
-
|
|
603
|
-
|
|
720
|
+
def component_evaluation(
|
|
721
|
+
cls, operand: DataComponent, period_from: Optional[str], period_to: str, conf: str
|
|
722
|
+
) -> DataComponent:
|
|
604
723
|
result = cls.component_validation(operand, period_from, period_to, conf)
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
724
|
+
if operand.data is not None:
|
|
725
|
+
result.data = operand.data.map(
|
|
726
|
+
lambda x: cls._execute_time_aggregation(
|
|
727
|
+
x, operand.data_type, period_from, period_to, conf
|
|
728
|
+
),
|
|
729
|
+
na_action="ignore",
|
|
730
|
+
)
|
|
609
731
|
return result
|
|
610
732
|
|
|
611
733
|
@classmethod
|
|
612
|
-
def scalar_evaluation(
|
|
613
|
-
|
|
734
|
+
def scalar_evaluation(
|
|
735
|
+
cls, operand: Scalar, period_from: Optional[str], period_to: str, conf: str
|
|
736
|
+
) -> Scalar:
|
|
614
737
|
result = cls.scalar_validation(operand, period_from, period_to, conf)
|
|
615
|
-
result.value = cls._execute_time_aggregation(
|
|
616
|
-
|
|
738
|
+
result.value = cls._execute_time_aggregation(
|
|
739
|
+
operand.value, operand.data_type, period_from, period_to, conf
|
|
740
|
+
)
|
|
617
741
|
return result
|
|
618
742
|
|
|
619
743
|
@classmethod
|
|
620
|
-
def validate(
|
|
621
|
-
|
|
744
|
+
def validate(
|
|
745
|
+
cls,
|
|
746
|
+
operand: Union[Dataset, DataComponent, Scalar],
|
|
747
|
+
period_from: Optional[str],
|
|
748
|
+
period_to: str,
|
|
749
|
+
conf: str,
|
|
750
|
+
) -> Union[Dataset, DataComponent, Scalar]:
|
|
622
751
|
cls._check_params(period_from, period_to)
|
|
623
752
|
if isinstance(operand, Dataset):
|
|
624
753
|
return cls.dataset_validation(operand, period_from, period_to, conf)
|
|
@@ -628,9 +757,13 @@ class Time_Aggregation(Time):
|
|
|
628
757
|
return cls.scalar_validation(operand, period_from, period_to, conf)
|
|
629
758
|
|
|
630
759
|
@classmethod
|
|
631
|
-
def evaluate(
|
|
632
|
-
|
|
633
|
-
|
|
760
|
+
def evaluate(
|
|
761
|
+
cls,
|
|
762
|
+
operand: Union[Dataset, DataComponent, Scalar],
|
|
763
|
+
period_from: Optional[str],
|
|
764
|
+
period_to: str,
|
|
765
|
+
conf: str,
|
|
766
|
+
) -> Union[Dataset, DataComponent, Scalar]:
|
|
634
767
|
cls._check_params(period_from, period_to)
|
|
635
768
|
if isinstance(operand, Dataset):
|
|
636
769
|
return cls.dataset_evaluation(operand, period_from, period_to, conf)
|
|
@@ -640,7 +773,7 @@ class Time_Aggregation(Time):
|
|
|
640
773
|
return cls.scalar_evaluation(operand, period_from, period_to, conf)
|
|
641
774
|
|
|
642
775
|
|
|
643
|
-
def _time_period_access(v:
|
|
776
|
+
def _time_period_access(v: Any, to_param: str) -> Any:
|
|
644
777
|
v = TimePeriodHandler(v)
|
|
645
778
|
if v.period_indicator == to_param:
|
|
646
779
|
return str(v)
|
|
@@ -648,7 +781,7 @@ def _time_period_access(v: str, to_param: str) -> str:
|
|
|
648
781
|
return str(v)
|
|
649
782
|
|
|
650
783
|
|
|
651
|
-
def _date_access(v, to_param, start: bool):
|
|
784
|
+
def _date_access(v: str, to_param: str, start: bool) -> Any:
|
|
652
785
|
period_value = date_to_period(date.fromisoformat(v), to_param)
|
|
653
786
|
if start:
|
|
654
787
|
return period_value.start_date()
|
|
@@ -658,11 +791,130 @@ def _date_access(v, to_param, start: bool):
|
|
|
658
791
|
class Current_Date(Time):
|
|
659
792
|
|
|
660
793
|
@classmethod
|
|
661
|
-
def validate(cls):
|
|
662
|
-
return Scalar(name=
|
|
794
|
+
def validate(cls) -> Scalar:
|
|
795
|
+
return Scalar(name="current_date", data_type=Date, value=None)
|
|
663
796
|
|
|
664
797
|
@classmethod
|
|
665
|
-
def evaluate(cls):
|
|
798
|
+
def evaluate(cls) -> Scalar:
|
|
666
799
|
result = cls.validate()
|
|
667
800
|
result.value = date.today().isoformat()
|
|
668
801
|
return result
|
|
802
|
+
|
|
803
|
+
|
|
804
|
+
class Date_Diff(Binary):
|
|
805
|
+
|
|
806
|
+
@classmethod
|
|
807
|
+
def evaluate(cls, Date1: Any, Date2: Any) -> Any:
|
|
808
|
+
# TODO: Implement this method (or adapt Binary's validate method to work with this operator)
|
|
809
|
+
pass
|
|
810
|
+
|
|
811
|
+
@classmethod
|
|
812
|
+
def validate(cls, Date1: Any, Date2: Any) -> Any:
|
|
813
|
+
pass
|
|
814
|
+
|
|
815
|
+
|
|
816
|
+
class Date_Add(Parametrized):
|
|
817
|
+
@classmethod
|
|
818
|
+
def evaluate(cls, operand: Any, param_list: List[Any]) -> Any:
|
|
819
|
+
# TODO: Implement this method (or adapt Binary's validate method to work with this operator)
|
|
820
|
+
pass
|
|
821
|
+
|
|
822
|
+
@classmethod
|
|
823
|
+
def validate(cls, operand: Any, param_list: List[Any]) -> Any:
|
|
824
|
+
pass
|
|
825
|
+
|
|
826
|
+
|
|
827
|
+
class Year(Unary):
|
|
828
|
+
|
|
829
|
+
@classmethod
|
|
830
|
+
def validate(cls, operand: Any) -> Any:
|
|
831
|
+
# TODO: Implement this method (or adapt Unary's validate method to work with this operator)
|
|
832
|
+
pass
|
|
833
|
+
|
|
834
|
+
@classmethod
|
|
835
|
+
def py_op(cls, x: Any) -> Any:
|
|
836
|
+
pass
|
|
837
|
+
|
|
838
|
+
|
|
839
|
+
class Month(Unary):
|
|
840
|
+
|
|
841
|
+
@classmethod
|
|
842
|
+
def validate(cls, operand: Any) -> Any:
|
|
843
|
+
# TODO: Implement this method (or adapt Unary's validate method to work with this operator)
|
|
844
|
+
pass
|
|
845
|
+
|
|
846
|
+
@classmethod
|
|
847
|
+
def py_op(cls, x: Any) -> Any:
|
|
848
|
+
pass
|
|
849
|
+
|
|
850
|
+
|
|
851
|
+
class Day_of_Month(Unary):
|
|
852
|
+
|
|
853
|
+
@classmethod
|
|
854
|
+
def validate(cls, operand: Any) -> Any:
|
|
855
|
+
# TODO: Implement this method (or adapt Unary's validate method to work with this operator)
|
|
856
|
+
pass
|
|
857
|
+
|
|
858
|
+
@classmethod
|
|
859
|
+
def py_op(cls, x: Any) -> Any:
|
|
860
|
+
pass
|
|
861
|
+
|
|
862
|
+
|
|
863
|
+
class Day_of_Year(Unary):
|
|
864
|
+
|
|
865
|
+
@classmethod
|
|
866
|
+
def validate(cls, operand: Any) -> Any:
|
|
867
|
+
# TODO: Implement this method (or adapt Unary's validate method to work with this operator)
|
|
868
|
+
pass
|
|
869
|
+
|
|
870
|
+
@classmethod
|
|
871
|
+
def py_op(cls, x: Any) -> Any:
|
|
872
|
+
pass
|
|
873
|
+
|
|
874
|
+
|
|
875
|
+
class Day_to_Year(Unary):
|
|
876
|
+
|
|
877
|
+
@classmethod
|
|
878
|
+
def validate(cls, operand: Any) -> Any:
|
|
879
|
+
# TODO: Implement this method (or adapt Unary's validate method to work with this operator)
|
|
880
|
+
pass
|
|
881
|
+
|
|
882
|
+
@classmethod
|
|
883
|
+
def py_op(cls, x: Any) -> Any:
|
|
884
|
+
pass
|
|
885
|
+
|
|
886
|
+
|
|
887
|
+
class Day_to_Month(Unary):
|
|
888
|
+
|
|
889
|
+
@classmethod
|
|
890
|
+
def validate(cls, operand: Any) -> Any:
|
|
891
|
+
# TODO: Implement this method (or adapt Unary's validate method to work with this operator)
|
|
892
|
+
pass
|
|
893
|
+
|
|
894
|
+
@classmethod
|
|
895
|
+
def py_op(cls, x: Any) -> Any:
|
|
896
|
+
pass
|
|
897
|
+
|
|
898
|
+
|
|
899
|
+
class Year_to_Day(Unary):
|
|
900
|
+
|
|
901
|
+
@classmethod
|
|
902
|
+
def validate(cls, operand: Any) -> Any:
|
|
903
|
+
# TODO: Implement this method (or adapt Unary's validate method to work with this operator)
|
|
904
|
+
pass
|
|
905
|
+
|
|
906
|
+
@classmethod
|
|
907
|
+
def py_op(cls, x: Any) -> Any:
|
|
908
|
+
pass
|
|
909
|
+
|
|
910
|
+
|
|
911
|
+
class Month_to_Day(Unary):
|
|
912
|
+
|
|
913
|
+
@classmethod
|
|
914
|
+
def validate(cls, operand: Any) -> Any:
|
|
915
|
+
# TODO: Implement this method (or adapt Unary's validate method to work with this operator)
|
|
916
|
+
pass
|
|
917
|
+
|
|
918
|
+
@classmethod
|
|
919
|
+
def py_op(cls, x: Any) -> Any:
|
|
920
|
+
pass
|