vtlengine 1.4.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vtlengine/API/_InternalApi.py +791 -0
- vtlengine/API/__init__.py +612 -0
- vtlengine/API/data/schema/external_routines_schema.json +34 -0
- vtlengine/API/data/schema/json_schema_2.1.json +116 -0
- vtlengine/API/data/schema/value_domain_schema.json +97 -0
- vtlengine/AST/ASTComment.py +57 -0
- vtlengine/AST/ASTConstructor.py +598 -0
- vtlengine/AST/ASTConstructorModules/Expr.py +1928 -0
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +995 -0
- vtlengine/AST/ASTConstructorModules/Terminals.py +790 -0
- vtlengine/AST/ASTConstructorModules/__init__.py +50 -0
- vtlengine/AST/ASTDataExchange.py +10 -0
- vtlengine/AST/ASTEncoders.py +32 -0
- vtlengine/AST/ASTString.py +675 -0
- vtlengine/AST/ASTTemplate.py +558 -0
- vtlengine/AST/ASTVisitor.py +25 -0
- vtlengine/AST/DAG/__init__.py +479 -0
- vtlengine/AST/DAG/_words.py +10 -0
- vtlengine/AST/Grammar/Vtl.g4 +705 -0
- vtlengine/AST/Grammar/VtlTokens.g4 +409 -0
- vtlengine/AST/Grammar/__init__.py +0 -0
- vtlengine/AST/Grammar/lexer.py +2139 -0
- vtlengine/AST/Grammar/parser.py +16597 -0
- vtlengine/AST/Grammar/tokens.py +169 -0
- vtlengine/AST/VtlVisitor.py +824 -0
- vtlengine/AST/__init__.py +674 -0
- vtlengine/DataTypes/TimeHandling.py +562 -0
- vtlengine/DataTypes/__init__.py +863 -0
- vtlengine/DataTypes/_time_checking.py +135 -0
- vtlengine/Exceptions/__exception_file_generator.py +96 -0
- vtlengine/Exceptions/__init__.py +159 -0
- vtlengine/Exceptions/messages.py +1004 -0
- vtlengine/Interpreter/__init__.py +2048 -0
- vtlengine/Model/__init__.py +501 -0
- vtlengine/Operators/Aggregation.py +357 -0
- vtlengine/Operators/Analytic.py +455 -0
- vtlengine/Operators/Assignment.py +23 -0
- vtlengine/Operators/Boolean.py +106 -0
- vtlengine/Operators/CastOperator.py +451 -0
- vtlengine/Operators/Clause.py +366 -0
- vtlengine/Operators/Comparison.py +488 -0
- vtlengine/Operators/Conditional.py +495 -0
- vtlengine/Operators/General.py +191 -0
- vtlengine/Operators/HROperators.py +254 -0
- vtlengine/Operators/Join.py +447 -0
- vtlengine/Operators/Numeric.py +422 -0
- vtlengine/Operators/RoleSetter.py +77 -0
- vtlengine/Operators/Set.py +176 -0
- vtlengine/Operators/String.py +578 -0
- vtlengine/Operators/Time.py +1144 -0
- vtlengine/Operators/Validation.py +275 -0
- vtlengine/Operators/__init__.py +900 -0
- vtlengine/Utils/__Virtual_Assets.py +34 -0
- vtlengine/Utils/__init__.py +479 -0
- vtlengine/__extras_check.py +17 -0
- vtlengine/__init__.py +27 -0
- vtlengine/files/__init__.py +0 -0
- vtlengine/files/output/__init__.py +35 -0
- vtlengine/files/output/_time_period_representation.py +55 -0
- vtlengine/files/parser/__init__.py +240 -0
- vtlengine/files/parser/_rfc_dialect.py +22 -0
- vtlengine/py.typed +0 -0
- vtlengine-1.4.0rc2.dist-info/METADATA +89 -0
- vtlengine-1.4.0rc2.dist-info/RECORD +66 -0
- vtlengine-1.4.0rc2.dist-info/WHEEL +4 -0
- vtlengine-1.4.0rc2.dist-info/licenses/LICENSE.md +661 -0
|
@@ -0,0 +1,1144 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from datetime import date, datetime, timedelta
|
|
3
|
+
from typing import Any, Dict, List, Optional, Type, Union
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
import vtlengine.Operators as Operators
|
|
8
|
+
from vtlengine.AST.Grammar.tokens import (
|
|
9
|
+
DATE_ADD,
|
|
10
|
+
DATEDIFF,
|
|
11
|
+
DAYOFMONTH,
|
|
12
|
+
DAYOFYEAR,
|
|
13
|
+
DAYTOMONTH,
|
|
14
|
+
DAYTOYEAR,
|
|
15
|
+
FILL_TIME_SERIES,
|
|
16
|
+
FLOW_TO_STOCK,
|
|
17
|
+
MONTH,
|
|
18
|
+
MONTHTODAY,
|
|
19
|
+
PERIOD_INDICATOR,
|
|
20
|
+
TIME_AGG,
|
|
21
|
+
TIMESHIFT,
|
|
22
|
+
YEAR,
|
|
23
|
+
YEARTODAY,
|
|
24
|
+
)
|
|
25
|
+
from vtlengine.DataTypes import (
|
|
26
|
+
Date,
|
|
27
|
+
Duration,
|
|
28
|
+
Integer,
|
|
29
|
+
ScalarType,
|
|
30
|
+
String,
|
|
31
|
+
TimeInterval,
|
|
32
|
+
TimePeriod,
|
|
33
|
+
unary_implicit_promotion,
|
|
34
|
+
)
|
|
35
|
+
from vtlengine.DataTypes.TimeHandling import (
|
|
36
|
+
PERIOD_IND_MAPPING,
|
|
37
|
+
TimePeriodHandler,
|
|
38
|
+
date_to_period,
|
|
39
|
+
period_to_date,
|
|
40
|
+
)
|
|
41
|
+
from vtlengine.Exceptions import RunTimeError, SemanticError
|
|
42
|
+
from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar
|
|
43
|
+
from vtlengine.Utils.__Virtual_Assets import VirtualCounter
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class Time(Operators.Operator):
|
|
47
|
+
periods: Any
|
|
48
|
+
time_id: Any
|
|
49
|
+
other_ids: Any
|
|
50
|
+
measures: Any
|
|
51
|
+
|
|
52
|
+
TIME_DATA_TYPES = [Date, TimePeriod, TimeInterval]
|
|
53
|
+
|
|
54
|
+
FREQUENCY_MAP = {"Y": "years", "M": "months", "D": "days"}
|
|
55
|
+
YEAR_TO_PERIOD = {"S": 2, "Q": 4, "M": 12, "W": 52, "D": 365}
|
|
56
|
+
PERIOD_ORDER = {"A": 0, "S": 1, "Q": 2, "M": 3, "W": 4, "D": 5}
|
|
57
|
+
|
|
58
|
+
op = FLOW_TO_STOCK
|
|
59
|
+
|
|
60
|
+
@classmethod
|
|
61
|
+
def _get_time_id(cls, operand: Dataset) -> str:
|
|
62
|
+
reference_id = None
|
|
63
|
+
identifiers = operand.get_identifiers()
|
|
64
|
+
if len(identifiers) == 0:
|
|
65
|
+
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
|
|
66
|
+
for id in operand.get_identifiers():
|
|
67
|
+
if id.data_type in cls.TIME_DATA_TYPES:
|
|
68
|
+
if reference_id is not None:
|
|
69
|
+
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
|
|
70
|
+
reference_id = id.name
|
|
71
|
+
if reference_id is None:
|
|
72
|
+
raise SemanticError("1-1-19-1", op=cls.op, data_type="Time_Period", comp="identifier")
|
|
73
|
+
return str(reference_id)
|
|
74
|
+
|
|
75
|
+
@classmethod
|
|
76
|
+
def sort_by_time(cls, operand: Dataset) -> Optional[pd.DataFrame]:
|
|
77
|
+
time_id = cls._get_time_id(operand)
|
|
78
|
+
if time_id is None:
|
|
79
|
+
return None
|
|
80
|
+
ids = [id.name for id in operand.get_identifiers() if id.name != time_id]
|
|
81
|
+
ids.append(time_id)
|
|
82
|
+
if operand.data is None:
|
|
83
|
+
return None
|
|
84
|
+
return operand.data.sort_values(by=ids).reset_index(drop=True)
|
|
85
|
+
|
|
86
|
+
@classmethod
|
|
87
|
+
def _get_period(cls, value: str) -> str:
|
|
88
|
+
tp_value = TimePeriodHandler(value)
|
|
89
|
+
return tp_value.period_indicator
|
|
90
|
+
|
|
91
|
+
@classmethod
|
|
92
|
+
def parse_date(cls, date_str: str) -> date:
|
|
93
|
+
return date.fromisoformat(date_str)
|
|
94
|
+
|
|
95
|
+
@classmethod
|
|
96
|
+
def get_frequencies(cls, dates: Any) -> Any:
|
|
97
|
+
dates = pd.to_datetime(dates)
|
|
98
|
+
dates = dates.sort_values()
|
|
99
|
+
deltas = dates.diff().dropna()
|
|
100
|
+
return deltas
|
|
101
|
+
|
|
102
|
+
@classmethod
|
|
103
|
+
def find_min_frequency(cls, differences: Any) -> str:
|
|
104
|
+
months_deltas = differences.apply(lambda x: x.days // 30)
|
|
105
|
+
days_deltas = differences.apply(lambda x: x.days)
|
|
106
|
+
min_months = min(
|
|
107
|
+
(diff for diff in months_deltas if diff > 0 and diff % 12 != 0),
|
|
108
|
+
default=None,
|
|
109
|
+
)
|
|
110
|
+
min_days = min(
|
|
111
|
+
(diff for diff in days_deltas if diff > 0 and diff % 365 != 0 and diff % 366 != 0),
|
|
112
|
+
default=None,
|
|
113
|
+
)
|
|
114
|
+
return "D" if min_days else "M" if min_months else "Y"
|
|
115
|
+
|
|
116
|
+
@classmethod
|
|
117
|
+
def get_frequency_from_time(cls, interval: str) -> Any:
|
|
118
|
+
start_date, end_date = interval.split("/")
|
|
119
|
+
return date.fromisoformat(end_date) - date.fromisoformat(start_date)
|
|
120
|
+
|
|
121
|
+
@classmethod
|
|
122
|
+
def get_date_format(cls, date_str: Union[str, date]) -> str:
|
|
123
|
+
date = cls.parse_date(date_str) if isinstance(date_str, str) else date_str
|
|
124
|
+
return "%Y-%m-%d" if date.day >= 1 else "%Y-%m" if date.month >= 1 else "%Y"
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class Unary(Time):
|
|
128
|
+
@classmethod
|
|
129
|
+
def validate(cls, operand: Any) -> Any:
|
|
130
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
131
|
+
if not isinstance(operand, Dataset):
|
|
132
|
+
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
|
|
133
|
+
if cls._get_time_id(operand) is None:
|
|
134
|
+
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
|
|
135
|
+
operand.data = cls.sort_by_time(operand)
|
|
136
|
+
return Dataset(name=dataset_name, components=operand.components.copy(), data=None)
|
|
137
|
+
|
|
138
|
+
@classmethod
|
|
139
|
+
def evaluate(cls, operand: Any) -> Any:
|
|
140
|
+
result = cls.validate(operand)
|
|
141
|
+
result.data = operand.data.copy() if operand.data is not None else pd.DataFrame()
|
|
142
|
+
if len(operand.data) < 2:
|
|
143
|
+
return result
|
|
144
|
+
|
|
145
|
+
cls.time_id = cls._get_time_id(result)
|
|
146
|
+
cls.other_ids = [id.name for id in result.get_identifiers() if id.name != cls.time_id]
|
|
147
|
+
measure_names = result.get_measures_names()
|
|
148
|
+
|
|
149
|
+
data_type = result.components[cls.time_id].data_type
|
|
150
|
+
|
|
151
|
+
result.data = result.data.sort_values(by=cls.other_ids + [cls.time_id])
|
|
152
|
+
if data_type == TimePeriod:
|
|
153
|
+
result.data = cls._period_accumulation(result.data, measure_names)
|
|
154
|
+
elif data_type in (Date, TimeInterval):
|
|
155
|
+
result.data[measure_names] = (
|
|
156
|
+
result.data.groupby(cls.other_ids)[measure_names]
|
|
157
|
+
.apply(cls.py_op)
|
|
158
|
+
.reset_index(drop=True)
|
|
159
|
+
)
|
|
160
|
+
else:
|
|
161
|
+
raise SemanticError("1-1-19-8", op=cls.op, comp_type="dataset", param="date type")
|
|
162
|
+
return result
|
|
163
|
+
|
|
164
|
+
@classmethod
|
|
165
|
+
def _period_accumulation(cls, data: pd.DataFrame, measure_names: List[str]) -> pd.DataFrame:
|
|
166
|
+
data = data.copy()
|
|
167
|
+
data["Period_group_col"] = (
|
|
168
|
+
data[cls.time_id].apply(cls._get_period).apply(lambda x: cls.PERIOD_ORDER[x])
|
|
169
|
+
)
|
|
170
|
+
result = data.groupby(cls.other_ids + ["Period_group_col"], group_keys=False)[
|
|
171
|
+
measure_names
|
|
172
|
+
].apply(cls.py_op)
|
|
173
|
+
data[measure_names] = result.reset_index(drop=True)
|
|
174
|
+
return data.drop(columns="Period_group_col")
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
class Binary(Time):
|
|
178
|
+
pass
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class Parameterized(Time):
|
|
182
|
+
pass
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
class Period_indicator(Unary):
|
|
186
|
+
op = PERIOD_INDICATOR
|
|
187
|
+
|
|
188
|
+
@classmethod
|
|
189
|
+
def validate(cls, operand: Any) -> Any:
|
|
190
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
191
|
+
if isinstance(operand, Dataset):
|
|
192
|
+
time_id = cls._get_time_id(operand)
|
|
193
|
+
if operand.components[time_id].data_type != TimePeriod:
|
|
194
|
+
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time period dataset")
|
|
195
|
+
result_components = {
|
|
196
|
+
comp.name: comp
|
|
197
|
+
for comp in operand.components.values()
|
|
198
|
+
if comp.role == Role.IDENTIFIER
|
|
199
|
+
}
|
|
200
|
+
result_components["duration_var"] = Component(
|
|
201
|
+
name="duration_var",
|
|
202
|
+
data_type=Duration,
|
|
203
|
+
role=Role.MEASURE,
|
|
204
|
+
nullable=True,
|
|
205
|
+
)
|
|
206
|
+
return Dataset(name=dataset_name, components=result_components, data=None)
|
|
207
|
+
# DataComponent and Scalar validation
|
|
208
|
+
if operand.data_type != TimePeriod:
|
|
209
|
+
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time period component")
|
|
210
|
+
if isinstance(operand, DataComponent):
|
|
211
|
+
return DataComponent(
|
|
212
|
+
name=operand.name, data_type=Duration, data=None, nullable=operand.nullable
|
|
213
|
+
)
|
|
214
|
+
return Scalar(name=operand.name, data_type=Duration, value=None)
|
|
215
|
+
|
|
216
|
+
@classmethod
|
|
217
|
+
def evaluate(
|
|
218
|
+
cls, operand: Union[Dataset, DataComponent, Scalar, str]
|
|
219
|
+
) -> Union[Dataset, DataComponent, Scalar, str]:
|
|
220
|
+
result = cls.validate(operand)
|
|
221
|
+
if isinstance(operand, str):
|
|
222
|
+
return cls._get_period(str(operand))
|
|
223
|
+
if isinstance(operand, Scalar):
|
|
224
|
+
result.value = cls._get_period(str(operand.value))
|
|
225
|
+
return result
|
|
226
|
+
if isinstance(operand, DataComponent):
|
|
227
|
+
if operand.data is not None:
|
|
228
|
+
result.data = operand.data.map(cls._get_period, na_action="ignore")
|
|
229
|
+
return result
|
|
230
|
+
cls.time_id = cls._get_time_id(operand)
|
|
231
|
+
result.data = (
|
|
232
|
+
operand.data.copy()[result.get_identifiers_names()]
|
|
233
|
+
if (operand.data is not None)
|
|
234
|
+
else pd.Series()
|
|
235
|
+
)
|
|
236
|
+
period_series: Any = result.data[cls.time_id].map(cls._get_period)
|
|
237
|
+
result.data["duration_var"] = period_series
|
|
238
|
+
return result
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
class Parametrized(Time):
|
|
242
|
+
@classmethod
|
|
243
|
+
def validate(cls, operand: Any, param: Any) -> Any:
|
|
244
|
+
pass
|
|
245
|
+
|
|
246
|
+
@classmethod
|
|
247
|
+
def evaluate(cls, operand: Any, param: Any) -> Any:
|
|
248
|
+
pass
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
class Flow_to_stock(Unary):
|
|
252
|
+
@classmethod
|
|
253
|
+
def py_op(cls, x: Any) -> Any:
|
|
254
|
+
return x.cumsum().fillna(x)
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
class Stock_to_flow(Unary):
|
|
258
|
+
@classmethod
|
|
259
|
+
def py_op(cls, x: Any) -> Any:
|
|
260
|
+
return x.diff().fillna(x)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
class Fill_time_series(Binary):
|
|
264
|
+
op = FILL_TIME_SERIES
|
|
265
|
+
|
|
266
|
+
@classmethod
|
|
267
|
+
def evaluate(cls, operand: Dataset, fill_type: str) -> Dataset:
|
|
268
|
+
result = cls.validate(operand, fill_type)
|
|
269
|
+
if operand.data is None:
|
|
270
|
+
operand.data = pd.DataFrame()
|
|
271
|
+
result.data = operand.data.copy()
|
|
272
|
+
result.data[cls.time_id] = result.data[cls.time_id].astype(str)
|
|
273
|
+
if len(result.data) < 2:
|
|
274
|
+
return result
|
|
275
|
+
data_type = result.components[cls.time_id].data_type
|
|
276
|
+
if data_type == TimePeriod:
|
|
277
|
+
result.data = cls.fill_periods(result.data, fill_type)
|
|
278
|
+
elif data_type == Date:
|
|
279
|
+
frequencies = cls.get_frequencies(operand.data[cls.time_id].apply(cls.parse_date))
|
|
280
|
+
result.data = cls.fill_dates(
|
|
281
|
+
result.data, fill_type, cls.find_min_frequency(frequencies)
|
|
282
|
+
)
|
|
283
|
+
elif data_type == TimeInterval:
|
|
284
|
+
frequencies = result.data[cls.time_id].apply(cls.get_frequency_from_time).unique()
|
|
285
|
+
if len(frequencies) > 1:
|
|
286
|
+
raise SemanticError(
|
|
287
|
+
"1-1-19-9",
|
|
288
|
+
op=cls.op,
|
|
289
|
+
comp_type="dataset",
|
|
290
|
+
param="single time interval frequency",
|
|
291
|
+
)
|
|
292
|
+
result.data = cls.fill_time_intervals(result.data, fill_type, frequencies[0])
|
|
293
|
+
else:
|
|
294
|
+
raise SemanticError("1-1-19-2", op=cls.op)
|
|
295
|
+
return result
|
|
296
|
+
|
|
297
|
+
@classmethod
|
|
298
|
+
def validate(cls, operand: Dataset, fill_type: str) -> Dataset:
|
|
299
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
300
|
+
if not isinstance(operand, Dataset):
|
|
301
|
+
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
|
|
302
|
+
cls.time_id = cls._get_time_id(operand)
|
|
303
|
+
cls.other_ids = [id.name for id in operand.get_identifiers() if id.name != cls.time_id]
|
|
304
|
+
cls.measures = operand.get_measures_names()
|
|
305
|
+
if cls.time_id is None:
|
|
306
|
+
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
|
|
307
|
+
if fill_type not in ["all", "single"]:
|
|
308
|
+
fill_type = "all"
|
|
309
|
+
return Dataset(name=dataset_name, components=operand.components.copy(), data=None)
|
|
310
|
+
|
|
311
|
+
@classmethod
|
|
312
|
+
def max_min_from_period(cls, data: pd.DataFrame, mode: str = "all") -> Dict[str, Any]:
|
|
313
|
+
result_dict: Dict[Any, Any] = {}
|
|
314
|
+
data = data.assign(
|
|
315
|
+
Periods_col=data[cls.time_id].apply(cls._get_period),
|
|
316
|
+
Periods_values_col=data[cls.time_id].apply(
|
|
317
|
+
lambda x: int(re.sub(r"[^\d]", "", x.split("-")[-1]))
|
|
318
|
+
),
|
|
319
|
+
Year_values_col=data[cls.time_id].apply(lambda x: int(x.split("-")[0])),
|
|
320
|
+
).sort_values(by=["Year_values_col", "Periods_col", "Periods_values_col"])
|
|
321
|
+
|
|
322
|
+
if mode == "all":
|
|
323
|
+
min_year = data["Year_values_col"].min()
|
|
324
|
+
max_year = data["Year_values_col"].max()
|
|
325
|
+
result_dict = {"min": {"A": min_year}, "max": {"A": max_year}}
|
|
326
|
+
for period, group in data.groupby("Periods_col"):
|
|
327
|
+
if period != "A":
|
|
328
|
+
result_dict["min"][period] = group["Periods_values_col"].min()
|
|
329
|
+
result_dict["max"][period] = group["Periods_values_col"].max()
|
|
330
|
+
|
|
331
|
+
elif mode == "single":
|
|
332
|
+
for name, group in data.groupby(cls.other_ids + ["Periods_col"]):
|
|
333
|
+
key = name[:-1] if len(name[:-1]) > 1 else name[0]
|
|
334
|
+
period = name[-1]
|
|
335
|
+
if key not in result_dict:
|
|
336
|
+
result_dict[key] = {
|
|
337
|
+
"min": {"A": group["Year_values_col"].min()},
|
|
338
|
+
"max": {"A": group["Year_values_col"].max()},
|
|
339
|
+
}
|
|
340
|
+
if period != "A":
|
|
341
|
+
year_min = group["Year_values_col"].min()
|
|
342
|
+
year_max = group["Year_values_col"].max()
|
|
343
|
+
|
|
344
|
+
result_dict[key]["min"]["A"] = min(result_dict[key]["min"]["A"], year_min)
|
|
345
|
+
result_dict[key]["max"]["A"] = max(result_dict[key]["max"]["A"], year_max)
|
|
346
|
+
result_dict[key]["min"][period] = group["Periods_values_col"].min()
|
|
347
|
+
result_dict[key]["max"][period] = group["Periods_values_col"].max()
|
|
348
|
+
|
|
349
|
+
else:
|
|
350
|
+
raise ValueError("Mode must be either 'all' or 'single'")
|
|
351
|
+
return result_dict
|
|
352
|
+
|
|
353
|
+
@classmethod
|
|
354
|
+
def fill_periods(cls, data: pd.DataFrame, fill_type: str) -> pd.DataFrame:
|
|
355
|
+
result_data = cls.period_filler(data, single=(fill_type != "all"))
|
|
356
|
+
not_na = result_data[cls.measures].notna().any(axis=1)
|
|
357
|
+
duplicated = result_data.duplicated(subset=(cls.other_ids + [cls.time_id]), keep=False)
|
|
358
|
+
return result_data[~duplicated | not_na]
|
|
359
|
+
|
|
360
|
+
@classmethod
|
|
361
|
+
def period_filler(cls, data: pd.DataFrame, single: bool = False) -> pd.DataFrame:
|
|
362
|
+
filled_data = []
|
|
363
|
+
MAX_MIN = cls.max_min_from_period(data, mode="single" if single else "all")
|
|
364
|
+
cls.periods = (
|
|
365
|
+
list(MAX_MIN[list(MAX_MIN.keys())[0]]["min"].keys())
|
|
366
|
+
if single
|
|
367
|
+
else list(MAX_MIN["min"].keys())
|
|
368
|
+
)
|
|
369
|
+
groups = data.groupby(cls.other_ids)
|
|
370
|
+
|
|
371
|
+
for group, group_df in groups:
|
|
372
|
+
period_limits = (
|
|
373
|
+
MAX_MIN if not single else MAX_MIN[group if len(group) > 1 else group[0]]
|
|
374
|
+
)
|
|
375
|
+
years = list(range(period_limits["min"]["A"], period_limits["max"]["A"] + 1))
|
|
376
|
+
for period in cls.periods:
|
|
377
|
+
if period == "A":
|
|
378
|
+
filled_data.extend(cls.fill_periods_rows(group_df, period, years))
|
|
379
|
+
else:
|
|
380
|
+
if period in period_limits["min"] and period in period_limits["max"]:
|
|
381
|
+
vals = list(
|
|
382
|
+
range(
|
|
383
|
+
period_limits["min"][period],
|
|
384
|
+
period_limits["max"][period] + 1,
|
|
385
|
+
)
|
|
386
|
+
)
|
|
387
|
+
filled_data.extend(
|
|
388
|
+
cls.fill_periods_rows(group_df, period, years, vals=vals)
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
filled_data = pd.concat(filled_data, ignore_index=True)
|
|
392
|
+
combined_data = pd.concat([filled_data, data], ignore_index=True)
|
|
393
|
+
if len(cls.periods) == 1 and cls.periods[0] == "A":
|
|
394
|
+
combined_data[cls.time_id] = combined_data[cls.time_id].astype(int)
|
|
395
|
+
else:
|
|
396
|
+
combined_data[cls.time_id] = combined_data[cls.time_id].astype(str)
|
|
397
|
+
return combined_data.sort_values(by=cls.other_ids + [cls.time_id])
|
|
398
|
+
|
|
399
|
+
@classmethod
|
|
400
|
+
def fill_periods_rows(
|
|
401
|
+
cls,
|
|
402
|
+
group_df: Any,
|
|
403
|
+
period: str,
|
|
404
|
+
years: List[int],
|
|
405
|
+
vals: Optional[List[int]] = None,
|
|
406
|
+
) -> List[Any]:
|
|
407
|
+
rows = []
|
|
408
|
+
for year in years:
|
|
409
|
+
if period == "A":
|
|
410
|
+
rows.append(cls.create_period_row(group_df, period, year))
|
|
411
|
+
elif vals is not None:
|
|
412
|
+
for val in vals:
|
|
413
|
+
rows.append(cls.create_period_row(group_df, period, year, val=val))
|
|
414
|
+
return rows
|
|
415
|
+
|
|
416
|
+
@classmethod
|
|
417
|
+
def create_period_row(
|
|
418
|
+
cls, group_df: Any, period: str, year: int, val: Optional[int] = None
|
|
419
|
+
) -> Any:
|
|
420
|
+
row = group_df.iloc[0].copy()
|
|
421
|
+
row[cls.time_id] = f"{year}" if period == "A" else f"{year}-{period}{val:d}"
|
|
422
|
+
row[cls.measures] = None
|
|
423
|
+
return row.to_frame().T
|
|
424
|
+
|
|
425
|
+
@classmethod
|
|
426
|
+
def max_min_from_date(cls, data: pd.DataFrame, fill_type: str = "all") -> Dict[str, Any]:
|
|
427
|
+
def compute_min_max(group: Any) -> Dict[str, Any]:
|
|
428
|
+
min_date = cls.parse_date(group.min())
|
|
429
|
+
max_date = cls.parse_date(group.max())
|
|
430
|
+
date_format = cls.get_date_format(max_date)
|
|
431
|
+
return {"min": min_date, "max": max_date, "date_format": date_format}
|
|
432
|
+
|
|
433
|
+
if fill_type == "all":
|
|
434
|
+
return compute_min_max(data[cls.time_id])
|
|
435
|
+
|
|
436
|
+
grouped = data.groupby(cls.other_ids)
|
|
437
|
+
result_dict = {
|
|
438
|
+
name if len(name) > 1 else name[0]: compute_min_max(group[cls.time_id])
|
|
439
|
+
for name, group in grouped
|
|
440
|
+
}
|
|
441
|
+
return result_dict
|
|
442
|
+
|
|
443
|
+
@classmethod
|
|
444
|
+
def fill_dates(cls, data: pd.DataFrame, fill_type: str, min_frequency: str) -> pd.DataFrame:
|
|
445
|
+
result_data = cls.date_filler(data, fill_type, min_frequency)
|
|
446
|
+
not_na = result_data[cls.measures].notna().any(axis=1)
|
|
447
|
+
duplicated = result_data.duplicated(subset=(cls.other_ids + [cls.time_id]), keep=False)
|
|
448
|
+
return result_data[~duplicated | not_na]
|
|
449
|
+
|
|
450
|
+
@classmethod
|
|
451
|
+
def date_filler(cls, data: pd.DataFrame, fill_type: str, min_frequency: str) -> pd.DataFrame:
|
|
452
|
+
MAX_MIN = cls.max_min_from_date(data, fill_type)
|
|
453
|
+
date_format = None
|
|
454
|
+
filled_data = []
|
|
455
|
+
|
|
456
|
+
def create_filled_dates(group: Any, min_max: Dict[str, Any]) -> (pd.DataFrame, str): # type: ignore[syntax]
|
|
457
|
+
date_range = pd.date_range(start=min_max["min"], end=min_max["max"], freq=min_frequency)
|
|
458
|
+
date_df = pd.DataFrame(date_range, columns=[cls.time_id])
|
|
459
|
+
date_df[cls.other_ids] = group.iloc[0][cls.other_ids]
|
|
460
|
+
date_df[cls.measures] = None
|
|
461
|
+
return date_df, min_max["date_format"]
|
|
462
|
+
|
|
463
|
+
for name, group in data.groupby(cls.other_ids):
|
|
464
|
+
min_max = MAX_MIN if fill_type == "all" else MAX_MIN[name if len(name) > 1 else name[0]]
|
|
465
|
+
filled_dates, date_format = create_filled_dates(group, min_max)
|
|
466
|
+
filled_data.append(filled_dates)
|
|
467
|
+
|
|
468
|
+
filled_data = pd.concat(filled_data, ignore_index=True)
|
|
469
|
+
filled_data[cls.time_id] = filled_data[cls.time_id].dt.strftime(date_format)
|
|
470
|
+
combined_data = pd.concat([filled_data, data], ignore_index=True)
|
|
471
|
+
combined_data[cls.time_id] = combined_data[cls.time_id].astype(str)
|
|
472
|
+
return combined_data.sort_values(by=cls.other_ids + [cls.time_id])
|
|
473
|
+
|
|
474
|
+
@classmethod
|
|
475
|
+
def max_min_from_time(cls, data: pd.DataFrame, fill_type: str = "all") -> Dict[str, Any]:
|
|
476
|
+
data = data.applymap(str).sort_values( # type: ignore[operator]
|
|
477
|
+
by=cls.other_ids + [cls.time_id]
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
def extract_max_min(group: Any) -> Dict[str, Any]:
|
|
481
|
+
start_dates = group.apply(lambda x: x.split("/")[0])
|
|
482
|
+
end_dates = group.apply(lambda x: x.split("/")[1])
|
|
483
|
+
return {
|
|
484
|
+
"start": {"min": start_dates.min(), "max": start_dates.max()},
|
|
485
|
+
"end": {"min": end_dates.min(), "max": end_dates.max()},
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
if fill_type == "all":
|
|
489
|
+
return extract_max_min(data[cls.time_id])
|
|
490
|
+
else:
|
|
491
|
+
return {
|
|
492
|
+
name: extract_max_min(group[cls.time_id])
|
|
493
|
+
for name, group in data.groupby(cls.other_ids)
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
@classmethod
|
|
497
|
+
def fill_time_intervals(
|
|
498
|
+
cls, data: pd.DataFrame, fill_type: str, frequency: str
|
|
499
|
+
) -> pd.DataFrame:
|
|
500
|
+
result_data = cls.time_filler(data, fill_type, frequency)
|
|
501
|
+
not_na = result_data[cls.measures].notna().any(axis=1)
|
|
502
|
+
duplicated = result_data.duplicated(subset=(cls.other_ids + [cls.time_id]), keep=False)
|
|
503
|
+
return result_data[~duplicated | not_na]
|
|
504
|
+
|
|
505
|
+
@classmethod
|
|
506
|
+
def time_filler(cls, data: pd.DataFrame, fill_type: str, frequency: str) -> pd.DataFrame:
|
|
507
|
+
MAX_MIN = cls.max_min_from_time(data, fill_type)
|
|
508
|
+
|
|
509
|
+
def fill_group(group_df: pd.DataFrame) -> pd.DataFrame:
|
|
510
|
+
group_key = group_df.iloc[0][cls.other_ids].values
|
|
511
|
+
if fill_type != "all":
|
|
512
|
+
group_key = group_key[0] if len(group_key) == 1 else tuple(group_key)
|
|
513
|
+
group_dict = MAX_MIN if fill_type == "all" else MAX_MIN[group_key]
|
|
514
|
+
|
|
515
|
+
intervals = [
|
|
516
|
+
f"{group_dict['start']['min']}/{group_dict['end']['min']}",
|
|
517
|
+
f"{group_dict['start']['max']}/{group_dict['end']['max']}",
|
|
518
|
+
]
|
|
519
|
+
for interval in intervals:
|
|
520
|
+
if interval not in group_df[cls.time_id].values:
|
|
521
|
+
empty_row = group_df.iloc[0].copy()
|
|
522
|
+
empty_row[cls.time_id] = interval
|
|
523
|
+
empty_row[cls.measures] = None
|
|
524
|
+
group_df = group_df.append( # type: ignore[operator]
|
|
525
|
+
empty_row, ignore_index=True
|
|
526
|
+
)
|
|
527
|
+
start_group_df = group_df.copy()
|
|
528
|
+
start_group_df[cls.time_id] = start_group_df[cls.time_id].apply(
|
|
529
|
+
lambda x: x.split("/")[0]
|
|
530
|
+
)
|
|
531
|
+
end_group_df = group_df.copy()
|
|
532
|
+
end_group_df[cls.time_id] = end_group_df[cls.time_id].apply(lambda x: x.split("/")[1])
|
|
533
|
+
start_filled = cls.date_filler(start_group_df, fill_type, frequency)
|
|
534
|
+
end_filled = cls.date_filler(end_group_df, fill_type, frequency)
|
|
535
|
+
start_filled[cls.time_id] = start_filled[cls.time_id].str.cat(
|
|
536
|
+
end_filled[cls.time_id], sep="/"
|
|
537
|
+
)
|
|
538
|
+
return start_filled
|
|
539
|
+
|
|
540
|
+
filled_data = [fill_group(group_df) for _, group_df in data.groupby(cls.other_ids)]
|
|
541
|
+
return (
|
|
542
|
+
pd.concat(filled_data, ignore_index=True)
|
|
543
|
+
.sort_values(by=cls.other_ids + [cls.time_id])
|
|
544
|
+
.drop_duplicates()
|
|
545
|
+
)
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
class Time_Shift(Binary):
|
|
549
|
+
op = TIMESHIFT
|
|
550
|
+
|
|
551
|
+
@classmethod
|
|
552
|
+
def evaluate(cls, operand: Dataset, shift_value: Any) -> Dataset:
|
|
553
|
+
result = cls.validate(operand, shift_value)
|
|
554
|
+
result.data = operand.data.copy() if operand.data is not None else pd.DataFrame()
|
|
555
|
+
shift_value = int(shift_value.value)
|
|
556
|
+
cls.time_id = cls._get_time_id(result)
|
|
557
|
+
|
|
558
|
+
data_type: Any = result.components[cls.time_id].data_type
|
|
559
|
+
|
|
560
|
+
if data_type == Date:
|
|
561
|
+
freq = cls.find_min_frequency(
|
|
562
|
+
cls.get_frequencies(
|
|
563
|
+
result.data[cls.time_id].map(cls.parse_date, na_action="ignore")
|
|
564
|
+
)
|
|
565
|
+
)
|
|
566
|
+
result.data[cls.time_id] = cls.shift_dates(result.data[cls.time_id], shift_value, freq)
|
|
567
|
+
elif data_type == Time:
|
|
568
|
+
freq = cls.get_frequency_from_time(result.data[cls.time_id].iloc[0])
|
|
569
|
+
result.data[cls.time_id] = result.data[cls.time_id].apply(
|
|
570
|
+
lambda x: cls.shift_interval(x, shift_value, freq)
|
|
571
|
+
)
|
|
572
|
+
elif data_type == TimePeriod:
|
|
573
|
+
periods = result.data[cls.time_id].apply(cls._get_period).unique()
|
|
574
|
+
result.data[cls.time_id] = result.data[cls.time_id].apply(
|
|
575
|
+
lambda x: cls.shift_period(x, shift_value)
|
|
576
|
+
)
|
|
577
|
+
if len(periods) == 1 and periods[0] == "A":
|
|
578
|
+
result.data[cls.time_id] = result.data[cls.time_id].astype(int)
|
|
579
|
+
else:
|
|
580
|
+
raise SemanticError("1-1-19-2", op=cls.op)
|
|
581
|
+
return result
|
|
582
|
+
|
|
583
|
+
@classmethod
|
|
584
|
+
def validate(cls, operand: Dataset, shift_value: str) -> Dataset:
|
|
585
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
586
|
+
if cls._get_time_id(operand) is None:
|
|
587
|
+
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
|
|
588
|
+
return Dataset(name=dataset_name, components=operand.components.copy(), data=None)
|
|
589
|
+
|
|
590
|
+
@classmethod
|
|
591
|
+
def shift_dates(cls, dates: Any, shift_value: int, frequency: str) -> Any:
|
|
592
|
+
dates = pd.to_datetime(dates)
|
|
593
|
+
if frequency == "D":
|
|
594
|
+
return dates + pd.to_timedelta(shift_value, unit="D")
|
|
595
|
+
elif frequency == "W":
|
|
596
|
+
return dates + pd.to_timedelta(shift_value, unit="W")
|
|
597
|
+
elif frequency == "Y":
|
|
598
|
+
return dates + pd.DateOffset(years=shift_value)
|
|
599
|
+
elif frequency in ["M", "Q", "S"]:
|
|
600
|
+
return dates + pd.DateOffset(months=shift_value)
|
|
601
|
+
raise SemanticError("2-1-19-2", period=frequency)
|
|
602
|
+
|
|
603
|
+
@classmethod
|
|
604
|
+
def shift_period(
|
|
605
|
+
cls, period_str: str, shift_value: int, frequency: Optional[int] = None
|
|
606
|
+
) -> str:
|
|
607
|
+
period_type = cls._get_period(period_str)
|
|
608
|
+
|
|
609
|
+
if period_type == "A":
|
|
610
|
+
return str(int(period_str) + shift_value)
|
|
611
|
+
|
|
612
|
+
if frequency:
|
|
613
|
+
shift_value *= frequency
|
|
614
|
+
|
|
615
|
+
tp_value = TimePeriodHandler(period_str)
|
|
616
|
+
year, period, value = (
|
|
617
|
+
tp_value.year,
|
|
618
|
+
tp_value.period_indicator,
|
|
619
|
+
tp_value.period_number + shift_value,
|
|
620
|
+
)
|
|
621
|
+
period_limit = cls.YEAR_TO_PERIOD[period]
|
|
622
|
+
|
|
623
|
+
if value <= 0:
|
|
624
|
+
year -= 1
|
|
625
|
+
value += period_limit
|
|
626
|
+
elif value > period_limit:
|
|
627
|
+
year += (value - 1) // period_limit
|
|
628
|
+
value = (value - 1) % period_limit + 1
|
|
629
|
+
|
|
630
|
+
return f"{year}-{period}{value}"
|
|
631
|
+
|
|
632
|
+
@classmethod
|
|
633
|
+
def shift_interval(cls, interval: str, shift_value: Any, frequency: str) -> str:
|
|
634
|
+
start_date, end_date = interval.split("/")
|
|
635
|
+
start_date = cls.shift_dates(start_date, shift_value, frequency)
|
|
636
|
+
end_date = cls.shift_dates(end_date, shift_value, frequency)
|
|
637
|
+
return f"{start_date}/{end_date}"
|
|
638
|
+
|
|
639
|
+
|
|
640
|
+
class Time_Aggregation(Time):
|
|
641
|
+
op = TIME_AGG
|
|
642
|
+
|
|
643
|
+
@classmethod
|
|
644
|
+
def _check_duration(cls, value: str) -> None:
|
|
645
|
+
if value not in PERIOD_IND_MAPPING:
|
|
646
|
+
raise SemanticError("1-1-19-3", op=cls.op, param="duration")
|
|
647
|
+
|
|
648
|
+
@classmethod
|
|
649
|
+
def _check_params(cls, period_from: Optional[str], period_to: str) -> None:
|
|
650
|
+
cls._check_duration(period_to)
|
|
651
|
+
if period_from is not None:
|
|
652
|
+
cls._check_duration(period_from)
|
|
653
|
+
if PERIOD_IND_MAPPING[period_to] <= PERIOD_IND_MAPPING[period_from]:
|
|
654
|
+
# OPERATORS_TIMEOPERATORS.19
|
|
655
|
+
raise SemanticError("1-1-19-4", op=cls.op, value_1=period_from, value_2=period_to)
|
|
656
|
+
|
|
657
|
+
@classmethod
|
|
658
|
+
def dataset_validation(
|
|
659
|
+
cls, operand: Dataset, period_from: Optional[str], period_to: str, conf: str
|
|
660
|
+
) -> Dataset:
|
|
661
|
+
# TODO: Review with VTL TF as this makes no sense
|
|
662
|
+
|
|
663
|
+
count_time_types = 0
|
|
664
|
+
for measure in operand.get_measures():
|
|
665
|
+
if measure.data_type in cls.TIME_DATA_TYPES:
|
|
666
|
+
count_time_types += 1
|
|
667
|
+
if measure.data_type == TimePeriod and period_to == "D":
|
|
668
|
+
raise SemanticError("1-1-19-5", op=cls.op)
|
|
669
|
+
if measure.data_type == TimeInterval:
|
|
670
|
+
raise SemanticError("1-1-19-6", op=cls.op, comp=measure.name)
|
|
671
|
+
|
|
672
|
+
count_time_types = 0
|
|
673
|
+
for id_ in operand.get_identifiers():
|
|
674
|
+
if id_.data_type in cls.TIME_DATA_TYPES:
|
|
675
|
+
count_time_types += 1
|
|
676
|
+
if count_time_types != 1:
|
|
677
|
+
raise SemanticError(
|
|
678
|
+
"1-1-19-9",
|
|
679
|
+
op=cls.op,
|
|
680
|
+
comp_type="dataset",
|
|
681
|
+
param="single time identifier",
|
|
682
|
+
)
|
|
683
|
+
|
|
684
|
+
if count_time_types != 1:
|
|
685
|
+
raise SemanticError(
|
|
686
|
+
"1-1-19-9", op=cls.op, comp_type="dataset", param="single time measure"
|
|
687
|
+
)
|
|
688
|
+
|
|
689
|
+
result_components = {
|
|
690
|
+
comp.name: comp
|
|
691
|
+
for comp in operand.components.values()
|
|
692
|
+
if comp.role in [Role.IDENTIFIER, Role.MEASURE]
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
return Dataset(name=operand.name, components=result_components, data=None)
|
|
696
|
+
|
|
697
|
+
@classmethod
|
|
698
|
+
def component_validation(
|
|
699
|
+
cls,
|
|
700
|
+
operand: DataComponent,
|
|
701
|
+
period_from: Optional[str],
|
|
702
|
+
period_to: str,
|
|
703
|
+
conf: str,
|
|
704
|
+
) -> DataComponent:
|
|
705
|
+
if operand.data_type not in cls.TIME_DATA_TYPES:
|
|
706
|
+
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time component")
|
|
707
|
+
if operand.data_type == TimePeriod and period_to == "D":
|
|
708
|
+
raise SemanticError("1-1-19-5", op=cls.op)
|
|
709
|
+
if operand.data_type == TimeInterval:
|
|
710
|
+
raise SemanticError("1-1-19-6", op=cls.op, comp=operand.name)
|
|
711
|
+
|
|
712
|
+
return DataComponent(
|
|
713
|
+
name=operand.name, data_type=operand.data_type, data=None, nullable=operand.nullable
|
|
714
|
+
)
|
|
715
|
+
|
|
716
|
+
@classmethod
|
|
717
|
+
def scalar_validation(
|
|
718
|
+
cls, operand: Scalar, period_from: Optional[str], period_to: str, conf: str
|
|
719
|
+
) -> Scalar:
|
|
720
|
+
if operand.data_type not in cls.TIME_DATA_TYPES:
|
|
721
|
+
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time scalar")
|
|
722
|
+
|
|
723
|
+
return Scalar(name=operand.name, data_type=operand.data_type, value=None)
|
|
724
|
+
|
|
725
|
+
@classmethod
|
|
726
|
+
def _execute_time_aggregation(
|
|
727
|
+
cls,
|
|
728
|
+
value: str,
|
|
729
|
+
data_type: Type[ScalarType],
|
|
730
|
+
period_from: Optional[str],
|
|
731
|
+
period_to: str,
|
|
732
|
+
conf: str,
|
|
733
|
+
) -> str:
|
|
734
|
+
if data_type == TimePeriod: # Time period
|
|
735
|
+
return _time_period_access(value, period_to)
|
|
736
|
+
|
|
737
|
+
elif data_type == Date:
|
|
738
|
+
start = conf == "first"
|
|
739
|
+
# Date
|
|
740
|
+
if period_to == "D":
|
|
741
|
+
return value
|
|
742
|
+
return _date_access(value, period_to, start)
|
|
743
|
+
else:
|
|
744
|
+
raise NotImplementedError
|
|
745
|
+
|
|
746
|
+
@classmethod
|
|
747
|
+
def dataset_evaluation(
|
|
748
|
+
cls, operand: Dataset, period_from: Optional[str], period_to: str, conf: str
|
|
749
|
+
) -> Dataset:
|
|
750
|
+
result = cls.dataset_validation(operand, period_from, period_to, conf)
|
|
751
|
+
result.data = operand.data.copy() if operand.data is not None else pd.DataFrame()
|
|
752
|
+
time_measure = [m for m in operand.get_measures() if m.data_type in cls.TIME_DATA_TYPES][0]
|
|
753
|
+
result.data[time_measure.name] = result.data[time_measure.name].map(
|
|
754
|
+
lambda x: cls._execute_time_aggregation(
|
|
755
|
+
x, time_measure.data_type, period_from, period_to, conf
|
|
756
|
+
),
|
|
757
|
+
na_action="ignore",
|
|
758
|
+
)
|
|
759
|
+
|
|
760
|
+
return result
|
|
761
|
+
|
|
762
|
+
@classmethod
|
|
763
|
+
def component_evaluation(
|
|
764
|
+
cls,
|
|
765
|
+
operand: DataComponent,
|
|
766
|
+
period_from: Optional[str],
|
|
767
|
+
period_to: str,
|
|
768
|
+
conf: str,
|
|
769
|
+
) -> DataComponent:
|
|
770
|
+
result = cls.component_validation(operand, period_from, period_to, conf)
|
|
771
|
+
if operand.data is not None:
|
|
772
|
+
result.data = operand.data.map(
|
|
773
|
+
lambda x: cls._execute_time_aggregation(
|
|
774
|
+
x, operand.data_type, period_from, period_to, conf
|
|
775
|
+
),
|
|
776
|
+
na_action="ignore",
|
|
777
|
+
)
|
|
778
|
+
return result
|
|
779
|
+
|
|
780
|
+
@classmethod
|
|
781
|
+
def scalar_evaluation(
|
|
782
|
+
cls, operand: Scalar, period_from: Optional[str], period_to: str, conf: str
|
|
783
|
+
) -> Scalar:
|
|
784
|
+
result = cls.scalar_validation(operand, period_from, period_to, conf)
|
|
785
|
+
result.value = cls._execute_time_aggregation(
|
|
786
|
+
operand.value, operand.data_type, period_from, period_to, conf
|
|
787
|
+
)
|
|
788
|
+
return result
|
|
789
|
+
|
|
790
|
+
@classmethod
|
|
791
|
+
def validate(
|
|
792
|
+
cls,
|
|
793
|
+
operand: Union[Dataset, DataComponent, Scalar],
|
|
794
|
+
period_from: Optional[str],
|
|
795
|
+
period_to: str,
|
|
796
|
+
conf: str,
|
|
797
|
+
) -> Union[Dataset, DataComponent, Scalar]:
|
|
798
|
+
cls._check_params(period_from, period_to)
|
|
799
|
+
if isinstance(operand, Dataset):
|
|
800
|
+
return cls.dataset_validation(operand, period_from, period_to, conf)
|
|
801
|
+
elif isinstance(operand, DataComponent):
|
|
802
|
+
return cls.component_validation(operand, period_from, period_to, conf)
|
|
803
|
+
else:
|
|
804
|
+
return cls.scalar_validation(operand, period_from, period_to, conf)
|
|
805
|
+
|
|
806
|
+
@classmethod
|
|
807
|
+
def evaluate(
|
|
808
|
+
cls,
|
|
809
|
+
operand: Union[Dataset, DataComponent, Scalar],
|
|
810
|
+
period_from: Optional[str],
|
|
811
|
+
period_to: str,
|
|
812
|
+
conf: str,
|
|
813
|
+
) -> Union[Dataset, DataComponent, Scalar]:
|
|
814
|
+
cls._check_params(period_from, period_to)
|
|
815
|
+
if isinstance(operand, Dataset):
|
|
816
|
+
return cls.dataset_evaluation(operand, period_from, period_to, conf)
|
|
817
|
+
elif isinstance(operand, DataComponent):
|
|
818
|
+
return cls.component_evaluation(operand, period_from, period_to, conf)
|
|
819
|
+
else:
|
|
820
|
+
return cls.scalar_evaluation(operand, period_from, period_to, conf)
|
|
821
|
+
|
|
822
|
+
|
|
823
|
+
def _time_period_access(v: Any, to_param: str) -> Any:
|
|
824
|
+
v = TimePeriodHandler(v)
|
|
825
|
+
if v.period_indicator == to_param:
|
|
826
|
+
return str(v)
|
|
827
|
+
v.change_indicator(to_param)
|
|
828
|
+
return str(v)
|
|
829
|
+
|
|
830
|
+
|
|
831
|
+
def _date_access(v: str, to_param: str, start: bool) -> Any:
|
|
832
|
+
period_value = date_to_period(date.fromisoformat(v), to_param)
|
|
833
|
+
if start:
|
|
834
|
+
return period_value.start_date()
|
|
835
|
+
return period_value.end_date()
|
|
836
|
+
|
|
837
|
+
|
|
838
|
+
class Current_Date(Time):
|
|
839
|
+
@classmethod
|
|
840
|
+
def validate(cls) -> Scalar:
|
|
841
|
+
return Scalar(name="current_date", data_type=Date, value=None)
|
|
842
|
+
|
|
843
|
+
@classmethod
|
|
844
|
+
def evaluate(cls) -> Scalar:
|
|
845
|
+
result = cls.validate()
|
|
846
|
+
result.value = date.today().isoformat()
|
|
847
|
+
return result
|
|
848
|
+
|
|
849
|
+
|
|
850
|
+
class SimpleBinaryTime(Operators.Binary):
|
|
851
|
+
@classmethod
|
|
852
|
+
def validate_type_compatibility(cls, left: Any, right: Any) -> bool:
|
|
853
|
+
if left == Date and right == TimePeriod:
|
|
854
|
+
return False
|
|
855
|
+
|
|
856
|
+
if left == TimePeriod and right == Date:
|
|
857
|
+
return False
|
|
858
|
+
|
|
859
|
+
return not (left == TimePeriod and right == Date)
|
|
860
|
+
|
|
861
|
+
@classmethod
|
|
862
|
+
def validate(
|
|
863
|
+
cls,
|
|
864
|
+
left_operand: Union[Dataset, DataComponent, Scalar],
|
|
865
|
+
right_operand: Union[Dataset, DataComponent, Scalar],
|
|
866
|
+
) -> Union[Dataset, DataComponent, Scalar]:
|
|
867
|
+
if isinstance(left_operand, Dataset) or isinstance(right_operand, Dataset):
|
|
868
|
+
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
|
|
869
|
+
if not cls.validate_type_compatibility(left_operand.data_type, right_operand.data_type):
|
|
870
|
+
raise SemanticError(
|
|
871
|
+
"1-1-1-2",
|
|
872
|
+
type_1=left_operand.data_type,
|
|
873
|
+
type_2=right_operand.data_type,
|
|
874
|
+
type_check=cls.type_to_check,
|
|
875
|
+
)
|
|
876
|
+
return super().validate(left_operand, right_operand)
|
|
877
|
+
|
|
878
|
+
@classmethod
|
|
879
|
+
def evaluate(
|
|
880
|
+
cls,
|
|
881
|
+
left_operand: Union[Dataset, DataComponent, Scalar],
|
|
882
|
+
right_operand: Union[Dataset, DataComponent, Scalar],
|
|
883
|
+
) -> Union[Dataset, DataComponent, Scalar]:
|
|
884
|
+
if isinstance(left_operand, Dataset) or isinstance(right_operand, Dataset):
|
|
885
|
+
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
|
|
886
|
+
else:
|
|
887
|
+
cls.validate(left_operand, right_operand)
|
|
888
|
+
return super().evaluate(left_operand, right_operand)
|
|
889
|
+
|
|
890
|
+
|
|
891
|
+
class Date_Diff(SimpleBinaryTime):
|
|
892
|
+
op = DATEDIFF
|
|
893
|
+
type_to_check = TimeInterval
|
|
894
|
+
return_type = Integer
|
|
895
|
+
|
|
896
|
+
@classmethod
|
|
897
|
+
def py_op(cls, x: Any, y: Any) -> int:
|
|
898
|
+
if (x.count("/") >= 1) or (y.count("/") >= 1):
|
|
899
|
+
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
|
|
900
|
+
|
|
901
|
+
if x.count("-") == 2:
|
|
902
|
+
fecha1 = datetime.strptime(x, "%Y-%m-%d").date()
|
|
903
|
+
else:
|
|
904
|
+
fecha1 = TimePeriodHandler(x).end_date(as_date=True) # type: ignore[assignment]
|
|
905
|
+
|
|
906
|
+
if y.count("-") == 2:
|
|
907
|
+
fecha2 = datetime.strptime(y, "%Y-%m-%d").date()
|
|
908
|
+
else:
|
|
909
|
+
fecha2 = TimePeriodHandler(y).end_date(as_date=True) # type: ignore[assignment]
|
|
910
|
+
|
|
911
|
+
return abs((fecha2 - fecha1).days)
|
|
912
|
+
|
|
913
|
+
|
|
914
|
+
class Date_Add(Parametrized):
|
|
915
|
+
op = DATE_ADD
|
|
916
|
+
|
|
917
|
+
@classmethod
|
|
918
|
+
def validate(
|
|
919
|
+
cls, operand: Union[Scalar, DataComponent, Dataset], param_list: List[Scalar]
|
|
920
|
+
) -> Union[Scalar, DataComponent, Dataset]:
|
|
921
|
+
dataset_name = VirtualCounter._new_ds_name()
|
|
922
|
+
expected_types = [Integer, String]
|
|
923
|
+
for i, param in enumerate(param_list):
|
|
924
|
+
error = (
|
|
925
|
+
12
|
|
926
|
+
if not isinstance(param, Scalar) # type: ignore[redundant-expr]
|
|
927
|
+
else 13
|
|
928
|
+
if (param.data_type != expected_types[i])
|
|
929
|
+
else None
|
|
930
|
+
)
|
|
931
|
+
if error is not None:
|
|
932
|
+
raise SemanticError(
|
|
933
|
+
f"2-1-19-{error}",
|
|
934
|
+
op=cls.op,
|
|
935
|
+
type=(param.__class__.__name__ if error == 12 else param.data_type.__name__),
|
|
936
|
+
name="shiftNumber" if error == 12 else "periodInd",
|
|
937
|
+
expected="Scalar" if error == 12 else expected_types[i].__name__,
|
|
938
|
+
)
|
|
939
|
+
|
|
940
|
+
if isinstance(operand, (Scalar, DataComponent)) and operand.data_type not in [
|
|
941
|
+
Date,
|
|
942
|
+
TimePeriod,
|
|
943
|
+
]:
|
|
944
|
+
unary_implicit_promotion(operand.data_type, Date)
|
|
945
|
+
|
|
946
|
+
if isinstance(operand, Scalar):
|
|
947
|
+
return Scalar(name=operand.name, data_type=operand.data_type, value=None)
|
|
948
|
+
if isinstance(operand, DataComponent):
|
|
949
|
+
return DataComponent(
|
|
950
|
+
name=operand.name, data_type=operand.data_type, data=None, nullable=operand.nullable
|
|
951
|
+
)
|
|
952
|
+
|
|
953
|
+
if all(comp.data_type not in [Date, TimePeriod] for comp in operand.components.values()):
|
|
954
|
+
raise SemanticError("2-1-19-14", op=cls.op, name=operand.name)
|
|
955
|
+
return Dataset(name=dataset_name, components=operand.components.copy(), data=None)
|
|
956
|
+
|
|
957
|
+
@classmethod
|
|
958
|
+
def evaluate(
|
|
959
|
+
cls, operand: Union[Scalar, DataComponent, Dataset], param_list: List[Scalar]
|
|
960
|
+
) -> Union[Scalar, DataComponent, Dataset]:
|
|
961
|
+
result = cls.validate(operand, param_list)
|
|
962
|
+
shift, period = param_list[0].value, param_list[1].value
|
|
963
|
+
is_tp = isinstance(operand, (Scalar, DataComponent)) and operand.data_type == TimePeriod
|
|
964
|
+
|
|
965
|
+
if isinstance(result, Scalar) and isinstance(operand, Scalar) and operand.value is not None:
|
|
966
|
+
result.value = cls.py_op(operand.value, shift, period, is_tp)
|
|
967
|
+
elif (
|
|
968
|
+
isinstance(result, DataComponent)
|
|
969
|
+
and isinstance(operand, DataComponent)
|
|
970
|
+
and operand.data is not None
|
|
971
|
+
):
|
|
972
|
+
result.data = operand.data.map(
|
|
973
|
+
lambda x: cls.py_op(x, shift, period, is_tp), na_action="ignore"
|
|
974
|
+
)
|
|
975
|
+
elif (
|
|
976
|
+
isinstance(result, Dataset)
|
|
977
|
+
and isinstance(operand, Dataset)
|
|
978
|
+
and operand.data is not None
|
|
979
|
+
):
|
|
980
|
+
result.data = operand.data.copy()
|
|
981
|
+
for measure in operand.get_measures():
|
|
982
|
+
if measure.data_type in [Date, TimePeriod]:
|
|
983
|
+
result.data[measure.name] = result.data[measure.name].map(
|
|
984
|
+
lambda x: cls.py_op(str(x), shift, period, measure.data_type == TimePeriod),
|
|
985
|
+
na_action="ignore",
|
|
986
|
+
)
|
|
987
|
+
measure.data_type = Date
|
|
988
|
+
|
|
989
|
+
if isinstance(result, (Scalar, DataComponent)):
|
|
990
|
+
result.data_type = Date
|
|
991
|
+
return result
|
|
992
|
+
|
|
993
|
+
@classmethod
|
|
994
|
+
def py_op(cls, date_str: str, shift: int, period: str, is_tp: bool = False) -> str:
|
|
995
|
+
if is_tp:
|
|
996
|
+
tp_value = TimePeriodHandler(date_str)
|
|
997
|
+
date = period_to_date(tp_value.year, tp_value.period_indicator, tp_value.period_number)
|
|
998
|
+
else:
|
|
999
|
+
date = datetime.strptime(date_str, "%Y-%m-%d")
|
|
1000
|
+
|
|
1001
|
+
if period in ["D", "W"]:
|
|
1002
|
+
days_shift = shift * (7 if period == "W" else 1)
|
|
1003
|
+
return (date + timedelta(days=days_shift)).strftime("%Y-%m-%d")
|
|
1004
|
+
|
|
1005
|
+
month_shift = {"M": 1, "Q": 3, "S": 6, "A": 12}[period] * shift
|
|
1006
|
+
new_year = date.year + (date.month - 1 + month_shift) // 12
|
|
1007
|
+
new_month = (date.month - 1 + month_shift) % 12 + 1
|
|
1008
|
+
last_day = (datetime(new_year, new_month % 12 + 1, 1) - timedelta(days=1)).day
|
|
1009
|
+
return date.replace(year=new_year, month=new_month, day=min(date.day, last_day)).strftime(
|
|
1010
|
+
"%Y-%m-%d"
|
|
1011
|
+
)
|
|
1012
|
+
|
|
1013
|
+
|
|
1014
|
+
class SimpleUnaryTime(Operators.Unary):
|
|
1015
|
+
@classmethod
|
|
1016
|
+
def validate(
|
|
1017
|
+
cls, operand: Union[Dataset, DataComponent, Scalar]
|
|
1018
|
+
) -> Union[Dataset, DataComponent, Scalar]:
|
|
1019
|
+
if isinstance(operand, Dataset):
|
|
1020
|
+
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
|
|
1021
|
+
|
|
1022
|
+
# Limit the operand to Date and TimePeriod (cannot be implemented with type_to_check)
|
|
1023
|
+
if operand.data_type == TimeInterval or operand.data_type not in (
|
|
1024
|
+
Date,
|
|
1025
|
+
TimePeriod,
|
|
1026
|
+
Duration,
|
|
1027
|
+
):
|
|
1028
|
+
raise SemanticError("1-1-19-10", op=cls.op)
|
|
1029
|
+
|
|
1030
|
+
return super().validate(operand)
|
|
1031
|
+
|
|
1032
|
+
@classmethod
|
|
1033
|
+
def evaluate(
|
|
1034
|
+
cls, operand: Union[Dataset, DataComponent, Scalar]
|
|
1035
|
+
) -> Union[Dataset, DataComponent, Scalar]:
|
|
1036
|
+
cls.validate(operand)
|
|
1037
|
+
return super().evaluate(operand)
|
|
1038
|
+
|
|
1039
|
+
|
|
1040
|
+
class Year(SimpleUnaryTime):
|
|
1041
|
+
op = YEAR
|
|
1042
|
+
|
|
1043
|
+
@classmethod
|
|
1044
|
+
def py_op(cls, value: str) -> int:
|
|
1045
|
+
return int(value[:4])
|
|
1046
|
+
|
|
1047
|
+
return_type = Integer
|
|
1048
|
+
|
|
1049
|
+
|
|
1050
|
+
class Month(SimpleUnaryTime):
|
|
1051
|
+
op = MONTH
|
|
1052
|
+
return_type = Integer
|
|
1053
|
+
|
|
1054
|
+
@classmethod
|
|
1055
|
+
def py_op(cls, value: str) -> int:
|
|
1056
|
+
if value.count("-") == 2:
|
|
1057
|
+
return date.fromisoformat(value).month
|
|
1058
|
+
|
|
1059
|
+
result = TimePeriodHandler(value).start_date(as_date=True)
|
|
1060
|
+
return result.month # type: ignore[union-attr]
|
|
1061
|
+
|
|
1062
|
+
|
|
1063
|
+
class Day_of_Month(SimpleUnaryTime):
|
|
1064
|
+
op = DAYOFMONTH
|
|
1065
|
+
return_type = Integer
|
|
1066
|
+
|
|
1067
|
+
@classmethod
|
|
1068
|
+
def py_op(cls, value: str) -> int:
|
|
1069
|
+
if value.count("-") == 2:
|
|
1070
|
+
return date.fromisoformat(value).day
|
|
1071
|
+
|
|
1072
|
+
result = TimePeriodHandler(value).end_date(as_date=True)
|
|
1073
|
+
return result.day # type: ignore[union-attr]
|
|
1074
|
+
|
|
1075
|
+
|
|
1076
|
+
class Day_of_Year(SimpleUnaryTime):
|
|
1077
|
+
op = DAYOFYEAR
|
|
1078
|
+
return_type = Integer
|
|
1079
|
+
|
|
1080
|
+
@classmethod
|
|
1081
|
+
def py_op(cls, value: str) -> int:
|
|
1082
|
+
if value.count("-") == 2:
|
|
1083
|
+
day_y = datetime.strptime(value, "%Y-%m-%d")
|
|
1084
|
+
return day_y.timetuple().tm_yday
|
|
1085
|
+
|
|
1086
|
+
result = TimePeriodHandler(value).end_date(as_date=True)
|
|
1087
|
+
datetime_value = datetime(
|
|
1088
|
+
year=result.year, # type: ignore[union-attr]
|
|
1089
|
+
month=result.month, # type: ignore[union-attr]
|
|
1090
|
+
day=result.day, # type: ignore[union-attr]
|
|
1091
|
+
)
|
|
1092
|
+
return datetime_value.timetuple().tm_yday
|
|
1093
|
+
|
|
1094
|
+
|
|
1095
|
+
class Day_to_Year(Operators.Unary):
|
|
1096
|
+
op = DAYTOYEAR
|
|
1097
|
+
return_type = Duration
|
|
1098
|
+
|
|
1099
|
+
@classmethod
|
|
1100
|
+
def py_op(cls, value: int) -> str:
|
|
1101
|
+
if value < 0:
|
|
1102
|
+
raise RunTimeError("2-1-19-16", op=cls.op)
|
|
1103
|
+
years = 0
|
|
1104
|
+
days_remaining = value
|
|
1105
|
+
if value >= 365:
|
|
1106
|
+
years = value // 365
|
|
1107
|
+
days_remaining = value % 365
|
|
1108
|
+
return f"P{int(years)}Y{int(days_remaining)}D"
|
|
1109
|
+
|
|
1110
|
+
|
|
1111
|
+
class Day_to_Month(Operators.Unary):
|
|
1112
|
+
op = DAYTOMONTH
|
|
1113
|
+
return_type = Duration
|
|
1114
|
+
|
|
1115
|
+
@classmethod
|
|
1116
|
+
def py_op(cls, value: int) -> str:
|
|
1117
|
+
if value < 0:
|
|
1118
|
+
raise RunTimeError("2-1-19-16", op=cls.op)
|
|
1119
|
+
months = 0
|
|
1120
|
+
days_remaining = value
|
|
1121
|
+
if value >= 30:
|
|
1122
|
+
months = value // 30
|
|
1123
|
+
days_remaining = value % 30
|
|
1124
|
+
return f"P{int(months)}M{int(days_remaining)}D"
|
|
1125
|
+
|
|
1126
|
+
|
|
1127
|
+
class Year_to_Day(Operators.Unary):
|
|
1128
|
+
op = YEARTODAY
|
|
1129
|
+
return_type = Integer
|
|
1130
|
+
|
|
1131
|
+
@classmethod
|
|
1132
|
+
def py_op(cls, value: str) -> int:
|
|
1133
|
+
days = Duration.to_days(value)
|
|
1134
|
+
return days
|
|
1135
|
+
|
|
1136
|
+
|
|
1137
|
+
class Month_to_Day(Operators.Unary):
|
|
1138
|
+
op = MONTHTODAY
|
|
1139
|
+
return_type = Integer
|
|
1140
|
+
|
|
1141
|
+
@classmethod
|
|
1142
|
+
def py_op(cls, value: str) -> int:
|
|
1143
|
+
days = Duration.to_days(value)
|
|
1144
|
+
return days
|