vtlengine 1.4.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. vtlengine/API/_InternalApi.py +791 -0
  2. vtlengine/API/__init__.py +612 -0
  3. vtlengine/API/data/schema/external_routines_schema.json +34 -0
  4. vtlengine/API/data/schema/json_schema_2.1.json +116 -0
  5. vtlengine/API/data/schema/value_domain_schema.json +97 -0
  6. vtlengine/AST/ASTComment.py +57 -0
  7. vtlengine/AST/ASTConstructor.py +598 -0
  8. vtlengine/AST/ASTConstructorModules/Expr.py +1928 -0
  9. vtlengine/AST/ASTConstructorModules/ExprComponents.py +995 -0
  10. vtlengine/AST/ASTConstructorModules/Terminals.py +790 -0
  11. vtlengine/AST/ASTConstructorModules/__init__.py +50 -0
  12. vtlengine/AST/ASTDataExchange.py +10 -0
  13. vtlengine/AST/ASTEncoders.py +32 -0
  14. vtlengine/AST/ASTString.py +675 -0
  15. vtlengine/AST/ASTTemplate.py +558 -0
  16. vtlengine/AST/ASTVisitor.py +25 -0
  17. vtlengine/AST/DAG/__init__.py +479 -0
  18. vtlengine/AST/DAG/_words.py +10 -0
  19. vtlengine/AST/Grammar/Vtl.g4 +705 -0
  20. vtlengine/AST/Grammar/VtlTokens.g4 +409 -0
  21. vtlengine/AST/Grammar/__init__.py +0 -0
  22. vtlengine/AST/Grammar/lexer.py +2139 -0
  23. vtlengine/AST/Grammar/parser.py +16597 -0
  24. vtlengine/AST/Grammar/tokens.py +169 -0
  25. vtlengine/AST/VtlVisitor.py +824 -0
  26. vtlengine/AST/__init__.py +674 -0
  27. vtlengine/DataTypes/TimeHandling.py +562 -0
  28. vtlengine/DataTypes/__init__.py +863 -0
  29. vtlengine/DataTypes/_time_checking.py +135 -0
  30. vtlengine/Exceptions/__exception_file_generator.py +96 -0
  31. vtlengine/Exceptions/__init__.py +159 -0
  32. vtlengine/Exceptions/messages.py +1004 -0
  33. vtlengine/Interpreter/__init__.py +2048 -0
  34. vtlengine/Model/__init__.py +501 -0
  35. vtlengine/Operators/Aggregation.py +357 -0
  36. vtlengine/Operators/Analytic.py +455 -0
  37. vtlengine/Operators/Assignment.py +23 -0
  38. vtlengine/Operators/Boolean.py +106 -0
  39. vtlengine/Operators/CastOperator.py +451 -0
  40. vtlengine/Operators/Clause.py +366 -0
  41. vtlengine/Operators/Comparison.py +488 -0
  42. vtlengine/Operators/Conditional.py +495 -0
  43. vtlengine/Operators/General.py +191 -0
  44. vtlengine/Operators/HROperators.py +254 -0
  45. vtlengine/Operators/Join.py +447 -0
  46. vtlengine/Operators/Numeric.py +422 -0
  47. vtlengine/Operators/RoleSetter.py +77 -0
  48. vtlengine/Operators/Set.py +176 -0
  49. vtlengine/Operators/String.py +578 -0
  50. vtlengine/Operators/Time.py +1144 -0
  51. vtlengine/Operators/Validation.py +275 -0
  52. vtlengine/Operators/__init__.py +900 -0
  53. vtlengine/Utils/__Virtual_Assets.py +34 -0
  54. vtlengine/Utils/__init__.py +479 -0
  55. vtlengine/__extras_check.py +17 -0
  56. vtlengine/__init__.py +27 -0
  57. vtlengine/files/__init__.py +0 -0
  58. vtlengine/files/output/__init__.py +35 -0
  59. vtlengine/files/output/_time_period_representation.py +55 -0
  60. vtlengine/files/parser/__init__.py +240 -0
  61. vtlengine/files/parser/_rfc_dialect.py +22 -0
  62. vtlengine/py.typed +0 -0
  63. vtlengine-1.4.0rc2.dist-info/METADATA +89 -0
  64. vtlengine-1.4.0rc2.dist-info/RECORD +66 -0
  65. vtlengine-1.4.0rc2.dist-info/WHEEL +4 -0
  66. vtlengine-1.4.0rc2.dist-info/licenses/LICENSE.md +661 -0
@@ -0,0 +1,1144 @@
1
+ import re
2
+ from datetime import date, datetime, timedelta
3
+ from typing import Any, Dict, List, Optional, Type, Union
4
+
5
+ import pandas as pd
6
+
7
+ import vtlengine.Operators as Operators
8
+ from vtlengine.AST.Grammar.tokens import (
9
+ DATE_ADD,
10
+ DATEDIFF,
11
+ DAYOFMONTH,
12
+ DAYOFYEAR,
13
+ DAYTOMONTH,
14
+ DAYTOYEAR,
15
+ FILL_TIME_SERIES,
16
+ FLOW_TO_STOCK,
17
+ MONTH,
18
+ MONTHTODAY,
19
+ PERIOD_INDICATOR,
20
+ TIME_AGG,
21
+ TIMESHIFT,
22
+ YEAR,
23
+ YEARTODAY,
24
+ )
25
+ from vtlengine.DataTypes import (
26
+ Date,
27
+ Duration,
28
+ Integer,
29
+ ScalarType,
30
+ String,
31
+ TimeInterval,
32
+ TimePeriod,
33
+ unary_implicit_promotion,
34
+ )
35
+ from vtlengine.DataTypes.TimeHandling import (
36
+ PERIOD_IND_MAPPING,
37
+ TimePeriodHandler,
38
+ date_to_period,
39
+ period_to_date,
40
+ )
41
+ from vtlengine.Exceptions import RunTimeError, SemanticError
42
+ from vtlengine.Model import Component, DataComponent, Dataset, Role, Scalar
43
+ from vtlengine.Utils.__Virtual_Assets import VirtualCounter
44
+
45
+
46
+ class Time(Operators.Operator):
47
+ periods: Any
48
+ time_id: Any
49
+ other_ids: Any
50
+ measures: Any
51
+
52
+ TIME_DATA_TYPES = [Date, TimePeriod, TimeInterval]
53
+
54
+ FREQUENCY_MAP = {"Y": "years", "M": "months", "D": "days"}
55
+ YEAR_TO_PERIOD = {"S": 2, "Q": 4, "M": 12, "W": 52, "D": 365}
56
+ PERIOD_ORDER = {"A": 0, "S": 1, "Q": 2, "M": 3, "W": 4, "D": 5}
57
+
58
+ op = FLOW_TO_STOCK
59
+
60
+ @classmethod
61
+ def _get_time_id(cls, operand: Dataset) -> str:
62
+ reference_id = None
63
+ identifiers = operand.get_identifiers()
64
+ if len(identifiers) == 0:
65
+ raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
66
+ for id in operand.get_identifiers():
67
+ if id.data_type in cls.TIME_DATA_TYPES:
68
+ if reference_id is not None:
69
+ raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
70
+ reference_id = id.name
71
+ if reference_id is None:
72
+ raise SemanticError("1-1-19-1", op=cls.op, data_type="Time_Period", comp="identifier")
73
+ return str(reference_id)
74
+
75
+ @classmethod
76
+ def sort_by_time(cls, operand: Dataset) -> Optional[pd.DataFrame]:
77
+ time_id = cls._get_time_id(operand)
78
+ if time_id is None:
79
+ return None
80
+ ids = [id.name for id in operand.get_identifiers() if id.name != time_id]
81
+ ids.append(time_id)
82
+ if operand.data is None:
83
+ return None
84
+ return operand.data.sort_values(by=ids).reset_index(drop=True)
85
+
86
+ @classmethod
87
+ def _get_period(cls, value: str) -> str:
88
+ tp_value = TimePeriodHandler(value)
89
+ return tp_value.period_indicator
90
+
91
+ @classmethod
92
+ def parse_date(cls, date_str: str) -> date:
93
+ return date.fromisoformat(date_str)
94
+
95
+ @classmethod
96
+ def get_frequencies(cls, dates: Any) -> Any:
97
+ dates = pd.to_datetime(dates)
98
+ dates = dates.sort_values()
99
+ deltas = dates.diff().dropna()
100
+ return deltas
101
+
102
+ @classmethod
103
+ def find_min_frequency(cls, differences: Any) -> str:
104
+ months_deltas = differences.apply(lambda x: x.days // 30)
105
+ days_deltas = differences.apply(lambda x: x.days)
106
+ min_months = min(
107
+ (diff for diff in months_deltas if diff > 0 and diff % 12 != 0),
108
+ default=None,
109
+ )
110
+ min_days = min(
111
+ (diff for diff in days_deltas if diff > 0 and diff % 365 != 0 and diff % 366 != 0),
112
+ default=None,
113
+ )
114
+ return "D" if min_days else "M" if min_months else "Y"
115
+
116
+ @classmethod
117
+ def get_frequency_from_time(cls, interval: str) -> Any:
118
+ start_date, end_date = interval.split("/")
119
+ return date.fromisoformat(end_date) - date.fromisoformat(start_date)
120
+
121
+ @classmethod
122
+ def get_date_format(cls, date_str: Union[str, date]) -> str:
123
+ date = cls.parse_date(date_str) if isinstance(date_str, str) else date_str
124
+ return "%Y-%m-%d" if date.day >= 1 else "%Y-%m" if date.month >= 1 else "%Y"
125
+
126
+
127
+ class Unary(Time):
128
+ @classmethod
129
+ def validate(cls, operand: Any) -> Any:
130
+ dataset_name = VirtualCounter._new_ds_name()
131
+ if not isinstance(operand, Dataset):
132
+ raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
133
+ if cls._get_time_id(operand) is None:
134
+ raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
135
+ operand.data = cls.sort_by_time(operand)
136
+ return Dataset(name=dataset_name, components=operand.components.copy(), data=None)
137
+
138
+ @classmethod
139
+ def evaluate(cls, operand: Any) -> Any:
140
+ result = cls.validate(operand)
141
+ result.data = operand.data.copy() if operand.data is not None else pd.DataFrame()
142
+ if len(operand.data) < 2:
143
+ return result
144
+
145
+ cls.time_id = cls._get_time_id(result)
146
+ cls.other_ids = [id.name for id in result.get_identifiers() if id.name != cls.time_id]
147
+ measure_names = result.get_measures_names()
148
+
149
+ data_type = result.components[cls.time_id].data_type
150
+
151
+ result.data = result.data.sort_values(by=cls.other_ids + [cls.time_id])
152
+ if data_type == TimePeriod:
153
+ result.data = cls._period_accumulation(result.data, measure_names)
154
+ elif data_type in (Date, TimeInterval):
155
+ result.data[measure_names] = (
156
+ result.data.groupby(cls.other_ids)[measure_names]
157
+ .apply(cls.py_op)
158
+ .reset_index(drop=True)
159
+ )
160
+ else:
161
+ raise SemanticError("1-1-19-8", op=cls.op, comp_type="dataset", param="date type")
162
+ return result
163
+
164
+ @classmethod
165
+ def _period_accumulation(cls, data: pd.DataFrame, measure_names: List[str]) -> pd.DataFrame:
166
+ data = data.copy()
167
+ data["Period_group_col"] = (
168
+ data[cls.time_id].apply(cls._get_period).apply(lambda x: cls.PERIOD_ORDER[x])
169
+ )
170
+ result = data.groupby(cls.other_ids + ["Period_group_col"], group_keys=False)[
171
+ measure_names
172
+ ].apply(cls.py_op)
173
+ data[measure_names] = result.reset_index(drop=True)
174
+ return data.drop(columns="Period_group_col")
175
+
176
+
177
+ class Binary(Time):
178
+ pass
179
+
180
+
181
+ class Parameterized(Time):
182
+ pass
183
+
184
+
185
+ class Period_indicator(Unary):
186
+ op = PERIOD_INDICATOR
187
+
188
+ @classmethod
189
+ def validate(cls, operand: Any) -> Any:
190
+ dataset_name = VirtualCounter._new_ds_name()
191
+ if isinstance(operand, Dataset):
192
+ time_id = cls._get_time_id(operand)
193
+ if operand.components[time_id].data_type != TimePeriod:
194
+ raise SemanticError("1-1-19-8", op=cls.op, comp_type="time period dataset")
195
+ result_components = {
196
+ comp.name: comp
197
+ for comp in operand.components.values()
198
+ if comp.role == Role.IDENTIFIER
199
+ }
200
+ result_components["duration_var"] = Component(
201
+ name="duration_var",
202
+ data_type=Duration,
203
+ role=Role.MEASURE,
204
+ nullable=True,
205
+ )
206
+ return Dataset(name=dataset_name, components=result_components, data=None)
207
+ # DataComponent and Scalar validation
208
+ if operand.data_type != TimePeriod:
209
+ raise SemanticError("1-1-19-8", op=cls.op, comp_type="time period component")
210
+ if isinstance(operand, DataComponent):
211
+ return DataComponent(
212
+ name=operand.name, data_type=Duration, data=None, nullable=operand.nullable
213
+ )
214
+ return Scalar(name=operand.name, data_type=Duration, value=None)
215
+
216
+ @classmethod
217
+ def evaluate(
218
+ cls, operand: Union[Dataset, DataComponent, Scalar, str]
219
+ ) -> Union[Dataset, DataComponent, Scalar, str]:
220
+ result = cls.validate(operand)
221
+ if isinstance(operand, str):
222
+ return cls._get_period(str(operand))
223
+ if isinstance(operand, Scalar):
224
+ result.value = cls._get_period(str(operand.value))
225
+ return result
226
+ if isinstance(operand, DataComponent):
227
+ if operand.data is not None:
228
+ result.data = operand.data.map(cls._get_period, na_action="ignore")
229
+ return result
230
+ cls.time_id = cls._get_time_id(operand)
231
+ result.data = (
232
+ operand.data.copy()[result.get_identifiers_names()]
233
+ if (operand.data is not None)
234
+ else pd.Series()
235
+ )
236
+ period_series: Any = result.data[cls.time_id].map(cls._get_period)
237
+ result.data["duration_var"] = period_series
238
+ return result
239
+
240
+
241
+ class Parametrized(Time):
242
+ @classmethod
243
+ def validate(cls, operand: Any, param: Any) -> Any:
244
+ pass
245
+
246
+ @classmethod
247
+ def evaluate(cls, operand: Any, param: Any) -> Any:
248
+ pass
249
+
250
+
251
+ class Flow_to_stock(Unary):
252
+ @classmethod
253
+ def py_op(cls, x: Any) -> Any:
254
+ return x.cumsum().fillna(x)
255
+
256
+
257
+ class Stock_to_flow(Unary):
258
+ @classmethod
259
+ def py_op(cls, x: Any) -> Any:
260
+ return x.diff().fillna(x)
261
+
262
+
263
+ class Fill_time_series(Binary):
264
+ op = FILL_TIME_SERIES
265
+
266
+ @classmethod
267
+ def evaluate(cls, operand: Dataset, fill_type: str) -> Dataset:
268
+ result = cls.validate(operand, fill_type)
269
+ if operand.data is None:
270
+ operand.data = pd.DataFrame()
271
+ result.data = operand.data.copy()
272
+ result.data[cls.time_id] = result.data[cls.time_id].astype(str)
273
+ if len(result.data) < 2:
274
+ return result
275
+ data_type = result.components[cls.time_id].data_type
276
+ if data_type == TimePeriod:
277
+ result.data = cls.fill_periods(result.data, fill_type)
278
+ elif data_type == Date:
279
+ frequencies = cls.get_frequencies(operand.data[cls.time_id].apply(cls.parse_date))
280
+ result.data = cls.fill_dates(
281
+ result.data, fill_type, cls.find_min_frequency(frequencies)
282
+ )
283
+ elif data_type == TimeInterval:
284
+ frequencies = result.data[cls.time_id].apply(cls.get_frequency_from_time).unique()
285
+ if len(frequencies) > 1:
286
+ raise SemanticError(
287
+ "1-1-19-9",
288
+ op=cls.op,
289
+ comp_type="dataset",
290
+ param="single time interval frequency",
291
+ )
292
+ result.data = cls.fill_time_intervals(result.data, fill_type, frequencies[0])
293
+ else:
294
+ raise SemanticError("1-1-19-2", op=cls.op)
295
+ return result
296
+
297
+ @classmethod
298
+ def validate(cls, operand: Dataset, fill_type: str) -> Dataset:
299
+ dataset_name = VirtualCounter._new_ds_name()
300
+ if not isinstance(operand, Dataset):
301
+ raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
302
+ cls.time_id = cls._get_time_id(operand)
303
+ cls.other_ids = [id.name for id in operand.get_identifiers() if id.name != cls.time_id]
304
+ cls.measures = operand.get_measures_names()
305
+ if cls.time_id is None:
306
+ raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
307
+ if fill_type not in ["all", "single"]:
308
+ fill_type = "all"
309
+ return Dataset(name=dataset_name, components=operand.components.copy(), data=None)
310
+
311
+ @classmethod
312
+ def max_min_from_period(cls, data: pd.DataFrame, mode: str = "all") -> Dict[str, Any]:
313
+ result_dict: Dict[Any, Any] = {}
314
+ data = data.assign(
315
+ Periods_col=data[cls.time_id].apply(cls._get_period),
316
+ Periods_values_col=data[cls.time_id].apply(
317
+ lambda x: int(re.sub(r"[^\d]", "", x.split("-")[-1]))
318
+ ),
319
+ Year_values_col=data[cls.time_id].apply(lambda x: int(x.split("-")[0])),
320
+ ).sort_values(by=["Year_values_col", "Periods_col", "Periods_values_col"])
321
+
322
+ if mode == "all":
323
+ min_year = data["Year_values_col"].min()
324
+ max_year = data["Year_values_col"].max()
325
+ result_dict = {"min": {"A": min_year}, "max": {"A": max_year}}
326
+ for period, group in data.groupby("Periods_col"):
327
+ if period != "A":
328
+ result_dict["min"][period] = group["Periods_values_col"].min()
329
+ result_dict["max"][period] = group["Periods_values_col"].max()
330
+
331
+ elif mode == "single":
332
+ for name, group in data.groupby(cls.other_ids + ["Periods_col"]):
333
+ key = name[:-1] if len(name[:-1]) > 1 else name[0]
334
+ period = name[-1]
335
+ if key not in result_dict:
336
+ result_dict[key] = {
337
+ "min": {"A": group["Year_values_col"].min()},
338
+ "max": {"A": group["Year_values_col"].max()},
339
+ }
340
+ if period != "A":
341
+ year_min = group["Year_values_col"].min()
342
+ year_max = group["Year_values_col"].max()
343
+
344
+ result_dict[key]["min"]["A"] = min(result_dict[key]["min"]["A"], year_min)
345
+ result_dict[key]["max"]["A"] = max(result_dict[key]["max"]["A"], year_max)
346
+ result_dict[key]["min"][period] = group["Periods_values_col"].min()
347
+ result_dict[key]["max"][period] = group["Periods_values_col"].max()
348
+
349
+ else:
350
+ raise ValueError("Mode must be either 'all' or 'single'")
351
+ return result_dict
352
+
353
+ @classmethod
354
+ def fill_periods(cls, data: pd.DataFrame, fill_type: str) -> pd.DataFrame:
355
+ result_data = cls.period_filler(data, single=(fill_type != "all"))
356
+ not_na = result_data[cls.measures].notna().any(axis=1)
357
+ duplicated = result_data.duplicated(subset=(cls.other_ids + [cls.time_id]), keep=False)
358
+ return result_data[~duplicated | not_na]
359
+
360
+ @classmethod
361
+ def period_filler(cls, data: pd.DataFrame, single: bool = False) -> pd.DataFrame:
362
+ filled_data = []
363
+ MAX_MIN = cls.max_min_from_period(data, mode="single" if single else "all")
364
+ cls.periods = (
365
+ list(MAX_MIN[list(MAX_MIN.keys())[0]]["min"].keys())
366
+ if single
367
+ else list(MAX_MIN["min"].keys())
368
+ )
369
+ groups = data.groupby(cls.other_ids)
370
+
371
+ for group, group_df in groups:
372
+ period_limits = (
373
+ MAX_MIN if not single else MAX_MIN[group if len(group) > 1 else group[0]]
374
+ )
375
+ years = list(range(period_limits["min"]["A"], period_limits["max"]["A"] + 1))
376
+ for period in cls.periods:
377
+ if period == "A":
378
+ filled_data.extend(cls.fill_periods_rows(group_df, period, years))
379
+ else:
380
+ if period in period_limits["min"] and period in period_limits["max"]:
381
+ vals = list(
382
+ range(
383
+ period_limits["min"][period],
384
+ period_limits["max"][period] + 1,
385
+ )
386
+ )
387
+ filled_data.extend(
388
+ cls.fill_periods_rows(group_df, period, years, vals=vals)
389
+ )
390
+
391
+ filled_data = pd.concat(filled_data, ignore_index=True)
392
+ combined_data = pd.concat([filled_data, data], ignore_index=True)
393
+ if len(cls.periods) == 1 and cls.periods[0] == "A":
394
+ combined_data[cls.time_id] = combined_data[cls.time_id].astype(int)
395
+ else:
396
+ combined_data[cls.time_id] = combined_data[cls.time_id].astype(str)
397
+ return combined_data.sort_values(by=cls.other_ids + [cls.time_id])
398
+
399
+ @classmethod
400
+ def fill_periods_rows(
401
+ cls,
402
+ group_df: Any,
403
+ period: str,
404
+ years: List[int],
405
+ vals: Optional[List[int]] = None,
406
+ ) -> List[Any]:
407
+ rows = []
408
+ for year in years:
409
+ if period == "A":
410
+ rows.append(cls.create_period_row(group_df, period, year))
411
+ elif vals is not None:
412
+ for val in vals:
413
+ rows.append(cls.create_period_row(group_df, period, year, val=val))
414
+ return rows
415
+
416
+ @classmethod
417
+ def create_period_row(
418
+ cls, group_df: Any, period: str, year: int, val: Optional[int] = None
419
+ ) -> Any:
420
+ row = group_df.iloc[0].copy()
421
+ row[cls.time_id] = f"{year}" if period == "A" else f"{year}-{period}{val:d}"
422
+ row[cls.measures] = None
423
+ return row.to_frame().T
424
+
425
+ @classmethod
426
+ def max_min_from_date(cls, data: pd.DataFrame, fill_type: str = "all") -> Dict[str, Any]:
427
+ def compute_min_max(group: Any) -> Dict[str, Any]:
428
+ min_date = cls.parse_date(group.min())
429
+ max_date = cls.parse_date(group.max())
430
+ date_format = cls.get_date_format(max_date)
431
+ return {"min": min_date, "max": max_date, "date_format": date_format}
432
+
433
+ if fill_type == "all":
434
+ return compute_min_max(data[cls.time_id])
435
+
436
+ grouped = data.groupby(cls.other_ids)
437
+ result_dict = {
438
+ name if len(name) > 1 else name[0]: compute_min_max(group[cls.time_id])
439
+ for name, group in grouped
440
+ }
441
+ return result_dict
442
+
443
+ @classmethod
444
+ def fill_dates(cls, data: pd.DataFrame, fill_type: str, min_frequency: str) -> pd.DataFrame:
445
+ result_data = cls.date_filler(data, fill_type, min_frequency)
446
+ not_na = result_data[cls.measures].notna().any(axis=1)
447
+ duplicated = result_data.duplicated(subset=(cls.other_ids + [cls.time_id]), keep=False)
448
+ return result_data[~duplicated | not_na]
449
+
450
+ @classmethod
451
+ def date_filler(cls, data: pd.DataFrame, fill_type: str, min_frequency: str) -> pd.DataFrame:
452
+ MAX_MIN = cls.max_min_from_date(data, fill_type)
453
+ date_format = None
454
+ filled_data = []
455
+
456
+ def create_filled_dates(group: Any, min_max: Dict[str, Any]) -> (pd.DataFrame, str): # type: ignore[syntax]
457
+ date_range = pd.date_range(start=min_max["min"], end=min_max["max"], freq=min_frequency)
458
+ date_df = pd.DataFrame(date_range, columns=[cls.time_id])
459
+ date_df[cls.other_ids] = group.iloc[0][cls.other_ids]
460
+ date_df[cls.measures] = None
461
+ return date_df, min_max["date_format"]
462
+
463
+ for name, group in data.groupby(cls.other_ids):
464
+ min_max = MAX_MIN if fill_type == "all" else MAX_MIN[name if len(name) > 1 else name[0]]
465
+ filled_dates, date_format = create_filled_dates(group, min_max)
466
+ filled_data.append(filled_dates)
467
+
468
+ filled_data = pd.concat(filled_data, ignore_index=True)
469
+ filled_data[cls.time_id] = filled_data[cls.time_id].dt.strftime(date_format)
470
+ combined_data = pd.concat([filled_data, data], ignore_index=True)
471
+ combined_data[cls.time_id] = combined_data[cls.time_id].astype(str)
472
+ return combined_data.sort_values(by=cls.other_ids + [cls.time_id])
473
+
474
+ @classmethod
475
+ def max_min_from_time(cls, data: pd.DataFrame, fill_type: str = "all") -> Dict[str, Any]:
476
+ data = data.applymap(str).sort_values( # type: ignore[operator]
477
+ by=cls.other_ids + [cls.time_id]
478
+ )
479
+
480
+ def extract_max_min(group: Any) -> Dict[str, Any]:
481
+ start_dates = group.apply(lambda x: x.split("/")[0])
482
+ end_dates = group.apply(lambda x: x.split("/")[1])
483
+ return {
484
+ "start": {"min": start_dates.min(), "max": start_dates.max()},
485
+ "end": {"min": end_dates.min(), "max": end_dates.max()},
486
+ }
487
+
488
+ if fill_type == "all":
489
+ return extract_max_min(data[cls.time_id])
490
+ else:
491
+ return {
492
+ name: extract_max_min(group[cls.time_id])
493
+ for name, group in data.groupby(cls.other_ids)
494
+ }
495
+
496
+ @classmethod
497
+ def fill_time_intervals(
498
+ cls, data: pd.DataFrame, fill_type: str, frequency: str
499
+ ) -> pd.DataFrame:
500
+ result_data = cls.time_filler(data, fill_type, frequency)
501
+ not_na = result_data[cls.measures].notna().any(axis=1)
502
+ duplicated = result_data.duplicated(subset=(cls.other_ids + [cls.time_id]), keep=False)
503
+ return result_data[~duplicated | not_na]
504
+
505
+ @classmethod
506
+ def time_filler(cls, data: pd.DataFrame, fill_type: str, frequency: str) -> pd.DataFrame:
507
+ MAX_MIN = cls.max_min_from_time(data, fill_type)
508
+
509
+ def fill_group(group_df: pd.DataFrame) -> pd.DataFrame:
510
+ group_key = group_df.iloc[0][cls.other_ids].values
511
+ if fill_type != "all":
512
+ group_key = group_key[0] if len(group_key) == 1 else tuple(group_key)
513
+ group_dict = MAX_MIN if fill_type == "all" else MAX_MIN[group_key]
514
+
515
+ intervals = [
516
+ f"{group_dict['start']['min']}/{group_dict['end']['min']}",
517
+ f"{group_dict['start']['max']}/{group_dict['end']['max']}",
518
+ ]
519
+ for interval in intervals:
520
+ if interval not in group_df[cls.time_id].values:
521
+ empty_row = group_df.iloc[0].copy()
522
+ empty_row[cls.time_id] = interval
523
+ empty_row[cls.measures] = None
524
+ group_df = group_df.append( # type: ignore[operator]
525
+ empty_row, ignore_index=True
526
+ )
527
+ start_group_df = group_df.copy()
528
+ start_group_df[cls.time_id] = start_group_df[cls.time_id].apply(
529
+ lambda x: x.split("/")[0]
530
+ )
531
+ end_group_df = group_df.copy()
532
+ end_group_df[cls.time_id] = end_group_df[cls.time_id].apply(lambda x: x.split("/")[1])
533
+ start_filled = cls.date_filler(start_group_df, fill_type, frequency)
534
+ end_filled = cls.date_filler(end_group_df, fill_type, frequency)
535
+ start_filled[cls.time_id] = start_filled[cls.time_id].str.cat(
536
+ end_filled[cls.time_id], sep="/"
537
+ )
538
+ return start_filled
539
+
540
+ filled_data = [fill_group(group_df) for _, group_df in data.groupby(cls.other_ids)]
541
+ return (
542
+ pd.concat(filled_data, ignore_index=True)
543
+ .sort_values(by=cls.other_ids + [cls.time_id])
544
+ .drop_duplicates()
545
+ )
546
+
547
+
548
+ class Time_Shift(Binary):
549
+ op = TIMESHIFT
550
+
551
+ @classmethod
552
+ def evaluate(cls, operand: Dataset, shift_value: Any) -> Dataset:
553
+ result = cls.validate(operand, shift_value)
554
+ result.data = operand.data.copy() if operand.data is not None else pd.DataFrame()
555
+ shift_value = int(shift_value.value)
556
+ cls.time_id = cls._get_time_id(result)
557
+
558
+ data_type: Any = result.components[cls.time_id].data_type
559
+
560
+ if data_type == Date:
561
+ freq = cls.find_min_frequency(
562
+ cls.get_frequencies(
563
+ result.data[cls.time_id].map(cls.parse_date, na_action="ignore")
564
+ )
565
+ )
566
+ result.data[cls.time_id] = cls.shift_dates(result.data[cls.time_id], shift_value, freq)
567
+ elif data_type == Time:
568
+ freq = cls.get_frequency_from_time(result.data[cls.time_id].iloc[0])
569
+ result.data[cls.time_id] = result.data[cls.time_id].apply(
570
+ lambda x: cls.shift_interval(x, shift_value, freq)
571
+ )
572
+ elif data_type == TimePeriod:
573
+ periods = result.data[cls.time_id].apply(cls._get_period).unique()
574
+ result.data[cls.time_id] = result.data[cls.time_id].apply(
575
+ lambda x: cls.shift_period(x, shift_value)
576
+ )
577
+ if len(periods) == 1 and periods[0] == "A":
578
+ result.data[cls.time_id] = result.data[cls.time_id].astype(int)
579
+ else:
580
+ raise SemanticError("1-1-19-2", op=cls.op)
581
+ return result
582
+
583
+ @classmethod
584
+ def validate(cls, operand: Dataset, shift_value: str) -> Dataset:
585
+ dataset_name = VirtualCounter._new_ds_name()
586
+ if cls._get_time_id(operand) is None:
587
+ raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
588
+ return Dataset(name=dataset_name, components=operand.components.copy(), data=None)
589
+
590
+ @classmethod
591
+ def shift_dates(cls, dates: Any, shift_value: int, frequency: str) -> Any:
592
+ dates = pd.to_datetime(dates)
593
+ if frequency == "D":
594
+ return dates + pd.to_timedelta(shift_value, unit="D")
595
+ elif frequency == "W":
596
+ return dates + pd.to_timedelta(shift_value, unit="W")
597
+ elif frequency == "Y":
598
+ return dates + pd.DateOffset(years=shift_value)
599
+ elif frequency in ["M", "Q", "S"]:
600
+ return dates + pd.DateOffset(months=shift_value)
601
+ raise SemanticError("2-1-19-2", period=frequency)
602
+
603
+ @classmethod
604
+ def shift_period(
605
+ cls, period_str: str, shift_value: int, frequency: Optional[int] = None
606
+ ) -> str:
607
+ period_type = cls._get_period(period_str)
608
+
609
+ if period_type == "A":
610
+ return str(int(period_str) + shift_value)
611
+
612
+ if frequency:
613
+ shift_value *= frequency
614
+
615
+ tp_value = TimePeriodHandler(period_str)
616
+ year, period, value = (
617
+ tp_value.year,
618
+ tp_value.period_indicator,
619
+ tp_value.period_number + shift_value,
620
+ )
621
+ period_limit = cls.YEAR_TO_PERIOD[period]
622
+
623
+ if value <= 0:
624
+ year -= 1
625
+ value += period_limit
626
+ elif value > period_limit:
627
+ year += (value - 1) // period_limit
628
+ value = (value - 1) % period_limit + 1
629
+
630
+ return f"{year}-{period}{value}"
631
+
632
+ @classmethod
633
+ def shift_interval(cls, interval: str, shift_value: Any, frequency: str) -> str:
634
+ start_date, end_date = interval.split("/")
635
+ start_date = cls.shift_dates(start_date, shift_value, frequency)
636
+ end_date = cls.shift_dates(end_date, shift_value, frequency)
637
+ return f"{start_date}/{end_date}"
638
+
639
+
640
+ class Time_Aggregation(Time):
641
+ op = TIME_AGG
642
+
643
+ @classmethod
644
+ def _check_duration(cls, value: str) -> None:
645
+ if value not in PERIOD_IND_MAPPING:
646
+ raise SemanticError("1-1-19-3", op=cls.op, param="duration")
647
+
648
+ @classmethod
649
+ def _check_params(cls, period_from: Optional[str], period_to: str) -> None:
650
+ cls._check_duration(period_to)
651
+ if period_from is not None:
652
+ cls._check_duration(period_from)
653
+ if PERIOD_IND_MAPPING[period_to] <= PERIOD_IND_MAPPING[period_from]:
654
+ # OPERATORS_TIMEOPERATORS.19
655
+ raise SemanticError("1-1-19-4", op=cls.op, value_1=period_from, value_2=period_to)
656
+
657
+ @classmethod
658
+ def dataset_validation(
659
+ cls, operand: Dataset, period_from: Optional[str], period_to: str, conf: str
660
+ ) -> Dataset:
661
+ # TODO: Review with VTL TF as this makes no sense
662
+
663
+ count_time_types = 0
664
+ for measure in operand.get_measures():
665
+ if measure.data_type in cls.TIME_DATA_TYPES:
666
+ count_time_types += 1
667
+ if measure.data_type == TimePeriod and period_to == "D":
668
+ raise SemanticError("1-1-19-5", op=cls.op)
669
+ if measure.data_type == TimeInterval:
670
+ raise SemanticError("1-1-19-6", op=cls.op, comp=measure.name)
671
+
672
+ count_time_types = 0
673
+ for id_ in operand.get_identifiers():
674
+ if id_.data_type in cls.TIME_DATA_TYPES:
675
+ count_time_types += 1
676
+ if count_time_types != 1:
677
+ raise SemanticError(
678
+ "1-1-19-9",
679
+ op=cls.op,
680
+ comp_type="dataset",
681
+ param="single time identifier",
682
+ )
683
+
684
+ if count_time_types != 1:
685
+ raise SemanticError(
686
+ "1-1-19-9", op=cls.op, comp_type="dataset", param="single time measure"
687
+ )
688
+
689
+ result_components = {
690
+ comp.name: comp
691
+ for comp in operand.components.values()
692
+ if comp.role in [Role.IDENTIFIER, Role.MEASURE]
693
+ }
694
+
695
+ return Dataset(name=operand.name, components=result_components, data=None)
696
+
697
+ @classmethod
698
+ def component_validation(
699
+ cls,
700
+ operand: DataComponent,
701
+ period_from: Optional[str],
702
+ period_to: str,
703
+ conf: str,
704
+ ) -> DataComponent:
705
+ if operand.data_type not in cls.TIME_DATA_TYPES:
706
+ raise SemanticError("1-1-19-8", op=cls.op, comp_type="time component")
707
+ if operand.data_type == TimePeriod and period_to == "D":
708
+ raise SemanticError("1-1-19-5", op=cls.op)
709
+ if operand.data_type == TimeInterval:
710
+ raise SemanticError("1-1-19-6", op=cls.op, comp=operand.name)
711
+
712
+ return DataComponent(
713
+ name=operand.name, data_type=operand.data_type, data=None, nullable=operand.nullable
714
+ )
715
+
716
+ @classmethod
717
+ def scalar_validation(
718
+ cls, operand: Scalar, period_from: Optional[str], period_to: str, conf: str
719
+ ) -> Scalar:
720
+ if operand.data_type not in cls.TIME_DATA_TYPES:
721
+ raise SemanticError("1-1-19-8", op=cls.op, comp_type="time scalar")
722
+
723
+ return Scalar(name=operand.name, data_type=operand.data_type, value=None)
724
+
725
+ @classmethod
726
+ def _execute_time_aggregation(
727
+ cls,
728
+ value: str,
729
+ data_type: Type[ScalarType],
730
+ period_from: Optional[str],
731
+ period_to: str,
732
+ conf: str,
733
+ ) -> str:
734
+ if data_type == TimePeriod: # Time period
735
+ return _time_period_access(value, period_to)
736
+
737
+ elif data_type == Date:
738
+ start = conf == "first"
739
+ # Date
740
+ if period_to == "D":
741
+ return value
742
+ return _date_access(value, period_to, start)
743
+ else:
744
+ raise NotImplementedError
745
+
746
+ @classmethod
747
+ def dataset_evaluation(
748
+ cls, operand: Dataset, period_from: Optional[str], period_to: str, conf: str
749
+ ) -> Dataset:
750
+ result = cls.dataset_validation(operand, period_from, period_to, conf)
751
+ result.data = operand.data.copy() if operand.data is not None else pd.DataFrame()
752
+ time_measure = [m for m in operand.get_measures() if m.data_type in cls.TIME_DATA_TYPES][0]
753
+ result.data[time_measure.name] = result.data[time_measure.name].map(
754
+ lambda x: cls._execute_time_aggregation(
755
+ x, time_measure.data_type, period_from, period_to, conf
756
+ ),
757
+ na_action="ignore",
758
+ )
759
+
760
+ return result
761
+
762
+ @classmethod
763
+ def component_evaluation(
764
+ cls,
765
+ operand: DataComponent,
766
+ period_from: Optional[str],
767
+ period_to: str,
768
+ conf: str,
769
+ ) -> DataComponent:
770
+ result = cls.component_validation(operand, period_from, period_to, conf)
771
+ if operand.data is not None:
772
+ result.data = operand.data.map(
773
+ lambda x: cls._execute_time_aggregation(
774
+ x, operand.data_type, period_from, period_to, conf
775
+ ),
776
+ na_action="ignore",
777
+ )
778
+ return result
779
+
780
+ @classmethod
781
+ def scalar_evaluation(
782
+ cls, operand: Scalar, period_from: Optional[str], period_to: str, conf: str
783
+ ) -> Scalar:
784
+ result = cls.scalar_validation(operand, period_from, period_to, conf)
785
+ result.value = cls._execute_time_aggregation(
786
+ operand.value, operand.data_type, period_from, period_to, conf
787
+ )
788
+ return result
789
+
790
+ @classmethod
791
+ def validate(
792
+ cls,
793
+ operand: Union[Dataset, DataComponent, Scalar],
794
+ period_from: Optional[str],
795
+ period_to: str,
796
+ conf: str,
797
+ ) -> Union[Dataset, DataComponent, Scalar]:
798
+ cls._check_params(period_from, period_to)
799
+ if isinstance(operand, Dataset):
800
+ return cls.dataset_validation(operand, period_from, period_to, conf)
801
+ elif isinstance(operand, DataComponent):
802
+ return cls.component_validation(operand, period_from, period_to, conf)
803
+ else:
804
+ return cls.scalar_validation(operand, period_from, period_to, conf)
805
+
806
+ @classmethod
807
+ def evaluate(
808
+ cls,
809
+ operand: Union[Dataset, DataComponent, Scalar],
810
+ period_from: Optional[str],
811
+ period_to: str,
812
+ conf: str,
813
+ ) -> Union[Dataset, DataComponent, Scalar]:
814
+ cls._check_params(period_from, period_to)
815
+ if isinstance(operand, Dataset):
816
+ return cls.dataset_evaluation(operand, period_from, period_to, conf)
817
+ elif isinstance(operand, DataComponent):
818
+ return cls.component_evaluation(operand, period_from, period_to, conf)
819
+ else:
820
+ return cls.scalar_evaluation(operand, period_from, period_to, conf)
821
+
822
+
823
+ def _time_period_access(v: Any, to_param: str) -> Any:
824
+ v = TimePeriodHandler(v)
825
+ if v.period_indicator == to_param:
826
+ return str(v)
827
+ v.change_indicator(to_param)
828
+ return str(v)
829
+
830
+
831
+ def _date_access(v: str, to_param: str, start: bool) -> Any:
832
+ period_value = date_to_period(date.fromisoformat(v), to_param)
833
+ if start:
834
+ return period_value.start_date()
835
+ return period_value.end_date()
836
+
837
+
838
+ class Current_Date(Time):
839
+ @classmethod
840
+ def validate(cls) -> Scalar:
841
+ return Scalar(name="current_date", data_type=Date, value=None)
842
+
843
+ @classmethod
844
+ def evaluate(cls) -> Scalar:
845
+ result = cls.validate()
846
+ result.value = date.today().isoformat()
847
+ return result
848
+
849
+
850
+ class SimpleBinaryTime(Operators.Binary):
851
+ @classmethod
852
+ def validate_type_compatibility(cls, left: Any, right: Any) -> bool:
853
+ if left == Date and right == TimePeriod:
854
+ return False
855
+
856
+ if left == TimePeriod and right == Date:
857
+ return False
858
+
859
+ return not (left == TimePeriod and right == Date)
860
+
861
+ @classmethod
862
+ def validate(
863
+ cls,
864
+ left_operand: Union[Dataset, DataComponent, Scalar],
865
+ right_operand: Union[Dataset, DataComponent, Scalar],
866
+ ) -> Union[Dataset, DataComponent, Scalar]:
867
+ if isinstance(left_operand, Dataset) or isinstance(right_operand, Dataset):
868
+ raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
869
+ if not cls.validate_type_compatibility(left_operand.data_type, right_operand.data_type):
870
+ raise SemanticError(
871
+ "1-1-1-2",
872
+ type_1=left_operand.data_type,
873
+ type_2=right_operand.data_type,
874
+ type_check=cls.type_to_check,
875
+ )
876
+ return super().validate(left_operand, right_operand)
877
+
878
+ @classmethod
879
+ def evaluate(
880
+ cls,
881
+ left_operand: Union[Dataset, DataComponent, Scalar],
882
+ right_operand: Union[Dataset, DataComponent, Scalar],
883
+ ) -> Union[Dataset, DataComponent, Scalar]:
884
+ if isinstance(left_operand, Dataset) or isinstance(right_operand, Dataset):
885
+ raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
886
+ else:
887
+ cls.validate(left_operand, right_operand)
888
+ return super().evaluate(left_operand, right_operand)
889
+
890
+
891
+ class Date_Diff(SimpleBinaryTime):
892
+ op = DATEDIFF
893
+ type_to_check = TimeInterval
894
+ return_type = Integer
895
+
896
+ @classmethod
897
+ def py_op(cls, x: Any, y: Any) -> int:
898
+ if (x.count("/") >= 1) or (y.count("/") >= 1):
899
+ raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
900
+
901
+ if x.count("-") == 2:
902
+ fecha1 = datetime.strptime(x, "%Y-%m-%d").date()
903
+ else:
904
+ fecha1 = TimePeriodHandler(x).end_date(as_date=True) # type: ignore[assignment]
905
+
906
+ if y.count("-") == 2:
907
+ fecha2 = datetime.strptime(y, "%Y-%m-%d").date()
908
+ else:
909
+ fecha2 = TimePeriodHandler(y).end_date(as_date=True) # type: ignore[assignment]
910
+
911
+ return abs((fecha2 - fecha1).days)
912
+
913
+
914
+ class Date_Add(Parametrized):
915
+ op = DATE_ADD
916
+
917
+ @classmethod
918
+ def validate(
919
+ cls, operand: Union[Scalar, DataComponent, Dataset], param_list: List[Scalar]
920
+ ) -> Union[Scalar, DataComponent, Dataset]:
921
+ dataset_name = VirtualCounter._new_ds_name()
922
+ expected_types = [Integer, String]
923
+ for i, param in enumerate(param_list):
924
+ error = (
925
+ 12
926
+ if not isinstance(param, Scalar) # type: ignore[redundant-expr]
927
+ else 13
928
+ if (param.data_type != expected_types[i])
929
+ else None
930
+ )
931
+ if error is not None:
932
+ raise SemanticError(
933
+ f"2-1-19-{error}",
934
+ op=cls.op,
935
+ type=(param.__class__.__name__ if error == 12 else param.data_type.__name__),
936
+ name="shiftNumber" if error == 12 else "periodInd",
937
+ expected="Scalar" if error == 12 else expected_types[i].__name__,
938
+ )
939
+
940
+ if isinstance(operand, (Scalar, DataComponent)) and operand.data_type not in [
941
+ Date,
942
+ TimePeriod,
943
+ ]:
944
+ unary_implicit_promotion(operand.data_type, Date)
945
+
946
+ if isinstance(operand, Scalar):
947
+ return Scalar(name=operand.name, data_type=operand.data_type, value=None)
948
+ if isinstance(operand, DataComponent):
949
+ return DataComponent(
950
+ name=operand.name, data_type=operand.data_type, data=None, nullable=operand.nullable
951
+ )
952
+
953
+ if all(comp.data_type not in [Date, TimePeriod] for comp in operand.components.values()):
954
+ raise SemanticError("2-1-19-14", op=cls.op, name=operand.name)
955
+ return Dataset(name=dataset_name, components=operand.components.copy(), data=None)
956
+
957
+ @classmethod
958
+ def evaluate(
959
+ cls, operand: Union[Scalar, DataComponent, Dataset], param_list: List[Scalar]
960
+ ) -> Union[Scalar, DataComponent, Dataset]:
961
+ result = cls.validate(operand, param_list)
962
+ shift, period = param_list[0].value, param_list[1].value
963
+ is_tp = isinstance(operand, (Scalar, DataComponent)) and operand.data_type == TimePeriod
964
+
965
+ if isinstance(result, Scalar) and isinstance(operand, Scalar) and operand.value is not None:
966
+ result.value = cls.py_op(operand.value, shift, period, is_tp)
967
+ elif (
968
+ isinstance(result, DataComponent)
969
+ and isinstance(operand, DataComponent)
970
+ and operand.data is not None
971
+ ):
972
+ result.data = operand.data.map(
973
+ lambda x: cls.py_op(x, shift, period, is_tp), na_action="ignore"
974
+ )
975
+ elif (
976
+ isinstance(result, Dataset)
977
+ and isinstance(operand, Dataset)
978
+ and operand.data is not None
979
+ ):
980
+ result.data = operand.data.copy()
981
+ for measure in operand.get_measures():
982
+ if measure.data_type in [Date, TimePeriod]:
983
+ result.data[measure.name] = result.data[measure.name].map(
984
+ lambda x: cls.py_op(str(x), shift, period, measure.data_type == TimePeriod),
985
+ na_action="ignore",
986
+ )
987
+ measure.data_type = Date
988
+
989
+ if isinstance(result, (Scalar, DataComponent)):
990
+ result.data_type = Date
991
+ return result
992
+
993
+ @classmethod
994
+ def py_op(cls, date_str: str, shift: int, period: str, is_tp: bool = False) -> str:
995
+ if is_tp:
996
+ tp_value = TimePeriodHandler(date_str)
997
+ date = period_to_date(tp_value.year, tp_value.period_indicator, tp_value.period_number)
998
+ else:
999
+ date = datetime.strptime(date_str, "%Y-%m-%d")
1000
+
1001
+ if period in ["D", "W"]:
1002
+ days_shift = shift * (7 if period == "W" else 1)
1003
+ return (date + timedelta(days=days_shift)).strftime("%Y-%m-%d")
1004
+
1005
+ month_shift = {"M": 1, "Q": 3, "S": 6, "A": 12}[period] * shift
1006
+ new_year = date.year + (date.month - 1 + month_shift) // 12
1007
+ new_month = (date.month - 1 + month_shift) % 12 + 1
1008
+ last_day = (datetime(new_year, new_month % 12 + 1, 1) - timedelta(days=1)).day
1009
+ return date.replace(year=new_year, month=new_month, day=min(date.day, last_day)).strftime(
1010
+ "%Y-%m-%d"
1011
+ )
1012
+
1013
+
1014
+ class SimpleUnaryTime(Operators.Unary):
1015
+ @classmethod
1016
+ def validate(
1017
+ cls, operand: Union[Dataset, DataComponent, Scalar]
1018
+ ) -> Union[Dataset, DataComponent, Scalar]:
1019
+ if isinstance(operand, Dataset):
1020
+ raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
1021
+
1022
+ # Limit the operand to Date and TimePeriod (cannot be implemented with type_to_check)
1023
+ if operand.data_type == TimeInterval or operand.data_type not in (
1024
+ Date,
1025
+ TimePeriod,
1026
+ Duration,
1027
+ ):
1028
+ raise SemanticError("1-1-19-10", op=cls.op)
1029
+
1030
+ return super().validate(operand)
1031
+
1032
+ @classmethod
1033
+ def evaluate(
1034
+ cls, operand: Union[Dataset, DataComponent, Scalar]
1035
+ ) -> Union[Dataset, DataComponent, Scalar]:
1036
+ cls.validate(operand)
1037
+ return super().evaluate(operand)
1038
+
1039
+
1040
+ class Year(SimpleUnaryTime):
1041
+ op = YEAR
1042
+
1043
+ @classmethod
1044
+ def py_op(cls, value: str) -> int:
1045
+ return int(value[:4])
1046
+
1047
+ return_type = Integer
1048
+
1049
+
1050
+ class Month(SimpleUnaryTime):
1051
+ op = MONTH
1052
+ return_type = Integer
1053
+
1054
+ @classmethod
1055
+ def py_op(cls, value: str) -> int:
1056
+ if value.count("-") == 2:
1057
+ return date.fromisoformat(value).month
1058
+
1059
+ result = TimePeriodHandler(value).start_date(as_date=True)
1060
+ return result.month # type: ignore[union-attr]
1061
+
1062
+
1063
+ class Day_of_Month(SimpleUnaryTime):
1064
+ op = DAYOFMONTH
1065
+ return_type = Integer
1066
+
1067
+ @classmethod
1068
+ def py_op(cls, value: str) -> int:
1069
+ if value.count("-") == 2:
1070
+ return date.fromisoformat(value).day
1071
+
1072
+ result = TimePeriodHandler(value).end_date(as_date=True)
1073
+ return result.day # type: ignore[union-attr]
1074
+
1075
+
1076
+ class Day_of_Year(SimpleUnaryTime):
1077
+ op = DAYOFYEAR
1078
+ return_type = Integer
1079
+
1080
+ @classmethod
1081
+ def py_op(cls, value: str) -> int:
1082
+ if value.count("-") == 2:
1083
+ day_y = datetime.strptime(value, "%Y-%m-%d")
1084
+ return day_y.timetuple().tm_yday
1085
+
1086
+ result = TimePeriodHandler(value).end_date(as_date=True)
1087
+ datetime_value = datetime(
1088
+ year=result.year, # type: ignore[union-attr]
1089
+ month=result.month, # type: ignore[union-attr]
1090
+ day=result.day, # type: ignore[union-attr]
1091
+ )
1092
+ return datetime_value.timetuple().tm_yday
1093
+
1094
+
1095
+ class Day_to_Year(Operators.Unary):
1096
+ op = DAYTOYEAR
1097
+ return_type = Duration
1098
+
1099
+ @classmethod
1100
+ def py_op(cls, value: int) -> str:
1101
+ if value < 0:
1102
+ raise RunTimeError("2-1-19-16", op=cls.op)
1103
+ years = 0
1104
+ days_remaining = value
1105
+ if value >= 365:
1106
+ years = value // 365
1107
+ days_remaining = value % 365
1108
+ return f"P{int(years)}Y{int(days_remaining)}D"
1109
+
1110
+
1111
+ class Day_to_Month(Operators.Unary):
1112
+ op = DAYTOMONTH
1113
+ return_type = Duration
1114
+
1115
+ @classmethod
1116
+ def py_op(cls, value: int) -> str:
1117
+ if value < 0:
1118
+ raise RunTimeError("2-1-19-16", op=cls.op)
1119
+ months = 0
1120
+ days_remaining = value
1121
+ if value >= 30:
1122
+ months = value // 30
1123
+ days_remaining = value % 30
1124
+ return f"P{int(months)}M{int(days_remaining)}D"
1125
+
1126
+
1127
+ class Year_to_Day(Operators.Unary):
1128
+ op = YEARTODAY
1129
+ return_type = Integer
1130
+
1131
+ @classmethod
1132
+ def py_op(cls, value: str) -> int:
1133
+ days = Duration.to_days(value)
1134
+ return days
1135
+
1136
+
1137
+ class Month_to_Day(Operators.Unary):
1138
+ op = MONTHTODAY
1139
+ return_type = Integer
1140
+
1141
+ @classmethod
1142
+ def py_op(cls, value: str) -> int:
1143
+ days = Duration.to_days(value)
1144
+ return days