spotforecast2 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. spotforecast2/.DS_Store +0 -0
  2. spotforecast2/__init__.py +2 -0
  3. spotforecast2/data/__init__.py +0 -0
  4. spotforecast2/data/data.py +130 -0
  5. spotforecast2/data/fetch_data.py +209 -0
  6. spotforecast2/exceptions.py +681 -0
  7. spotforecast2/forecaster/.DS_Store +0 -0
  8. spotforecast2/forecaster/__init__.py +7 -0
  9. spotforecast2/forecaster/base.py +448 -0
  10. spotforecast2/forecaster/metrics.py +527 -0
  11. spotforecast2/forecaster/recursive/__init__.py +4 -0
  12. spotforecast2/forecaster/recursive/_forecaster_equivalent_date.py +1075 -0
  13. spotforecast2/forecaster/recursive/_forecaster_recursive.py +939 -0
  14. spotforecast2/forecaster/recursive/_warnings.py +15 -0
  15. spotforecast2/forecaster/utils.py +954 -0
  16. spotforecast2/model_selection/__init__.py +5 -0
  17. spotforecast2/model_selection/bayesian_search.py +453 -0
  18. spotforecast2/model_selection/grid_search.py +314 -0
  19. spotforecast2/model_selection/random_search.py +151 -0
  20. spotforecast2/model_selection/split_base.py +357 -0
  21. spotforecast2/model_selection/split_one_step.py +245 -0
  22. spotforecast2/model_selection/split_ts_cv.py +634 -0
  23. spotforecast2/model_selection/utils_common.py +718 -0
  24. spotforecast2/model_selection/utils_metrics.py +103 -0
  25. spotforecast2/model_selection/validation.py +685 -0
  26. spotforecast2/preprocessing/__init__.py +30 -0
  27. spotforecast2/preprocessing/_binner.py +378 -0
  28. spotforecast2/preprocessing/_common.py +123 -0
  29. spotforecast2/preprocessing/_differentiator.py +123 -0
  30. spotforecast2/preprocessing/_rolling.py +136 -0
  31. spotforecast2/preprocessing/curate_data.py +254 -0
  32. spotforecast2/preprocessing/imputation.py +92 -0
  33. spotforecast2/preprocessing/outlier.py +114 -0
  34. spotforecast2/preprocessing/split.py +139 -0
  35. spotforecast2/py.typed +0 -0
  36. spotforecast2/utils/__init__.py +43 -0
  37. spotforecast2/utils/convert_to_utc.py +44 -0
  38. spotforecast2/utils/data_transform.py +208 -0
  39. spotforecast2/utils/forecaster_config.py +344 -0
  40. spotforecast2/utils/generate_holiday.py +70 -0
  41. spotforecast2/utils/validation.py +569 -0
  42. spotforecast2/weather/__init__.py +0 -0
  43. spotforecast2/weather/weather_client.py +288 -0
  44. spotforecast2-0.0.1.dist-info/METADATA +47 -0
  45. spotforecast2-0.0.1.dist-info/RECORD +46 -0
  46. spotforecast2-0.0.1.dist-info/WHEEL +4 -0
@@ -0,0 +1,357 @@
1
+ """
2
+ Base class for time series cross-validation splitting.
3
+ """
4
+
5
+ from __future__ import annotations
6
+ import warnings
7
+ import numpy as np
8
+ import pandas as pd
9
+ from spotforecast2.exceptions import IgnoredArgumentWarning
10
+
11
+
12
+ class BaseFold:
13
+ """
14
+ Base class for all Fold classes in spotforecast. All fold classes should specify
15
+ all the parameters that can be set at the class level in their ``__init__``.
16
+
17
+ Args:
18
+ steps (int, optional): Number of observations used to be predicted in each fold.
19
+ This is also commonly referred to as the forecast horizon or test size.
20
+ Defaults to None.
21
+ initial_train_size (int | str | pd.Timestamp, optional): Number of observations
22
+ used for initial training.
23
+
24
+ - If an integer, the number of observations used for initial training.
25
+ - If a date string or pandas Timestamp, it is the last date included in
26
+ the initial training set.
27
+ Defaults to None.
28
+ fold_stride (int, optional): Number of observations that the start of the test
29
+ set advances between consecutive folds.
30
+
31
+ - If `None`, it defaults to the same value as `steps`, meaning that folds
32
+ are placed back-to-back without overlap.
33
+ - If `fold_stride < steps`, test sets overlap and multiple forecasts will
34
+ be generated for the same observations.
35
+ - If `fold_stride > steps`, gaps are left between consecutive test sets.
36
+ Defaults to None.
37
+ window_size (int, optional): Number of observations needed to generate the
38
+ autoregressive predictors. Defaults to None.
39
+ differentiation (int, optional): Number of observations to use for differentiation.
40
+ This is used to extend the `last_window` as many observations as the
41
+ differentiation order. Defaults to None.
42
+ refit (bool | int, optional): Whether to refit the forecaster in each fold.
43
+
44
+ - If `True`, the forecaster is refitted in each fold.
45
+ - If `False`, the forecaster is trained only in the first fold.
46
+ - If an integer, the forecaster is trained in the first fold and then refitted
47
+ every `refit` folds.
48
+ Defaults to False.
49
+ fixed_train_size (bool, optional): Whether the training size is fixed or increases
50
+ in each fold. Defaults to True.
51
+ gap (int, optional): Number of observations between the end of the training set
52
+ and the start of the test set. Defaults to 0.
53
+ skip_folds (int | list, optional): Number of folds to skip.
54
+
55
+ - If an integer, every 'skip_folds'-th is returned.
56
+ - If a list, the indexes of the folds to skip.
57
+
58
+ For example, if `skip_folds=3` and there are 10 folds, the returned folds are
59
+ 0, 3, 6, and 9. If `skip_folds=[1, 2, 3]`, the returned folds are 0, 4, 5, 6, 7,
60
+ 8, and 9. Defaults to None.
61
+ allow_incomplete_fold (bool, optional): Whether to allow the last fold to include
62
+ fewer observations than `steps`. If `False`, the last fold is excluded if it
63
+ is incomplete. Defaults to True.
64
+ return_all_indexes (bool, optional): Whether to return all indexes or only the
65
+ start and end indexes of each fold. Defaults to False.
66
+ verbose (bool, optional): Whether to print information about generated folds.
67
+ Defaults to True.
68
+
69
+ Attributes:
70
+ initial_train_size (int): Number of observations used for initial training.
71
+ window_size (int): Number of observations needed to generate the
72
+ autoregressive predictors.
73
+ differentiation (int): Number of observations to use for differentiation.
74
+ This is used to extend the `last_window` as many observations as the
75
+ differentiation order.
76
+ return_all_indexes (bool): Whether to return all indexes or only the start
77
+ and end indexes of each fold.
78
+ verbose (bool): Whether to print information about generated folds.
79
+ """
80
+
81
+ def __init__(
82
+ self,
83
+ steps: int | None = None,
84
+ initial_train_size: int | str | pd.Timestamp | None = None,
85
+ fold_stride: int | None = None,
86
+ window_size: int | None = None,
87
+ differentiation: int | None = None,
88
+ refit: bool | int = False,
89
+ fixed_train_size: bool = True,
90
+ gap: int = 0,
91
+ skip_folds: int | list[int] | None = None,
92
+ allow_incomplete_fold: bool = True,
93
+ return_all_indexes: bool = False,
94
+ verbose: bool = True,
95
+ ) -> None:
96
+
97
+ self._validate_params(
98
+ cv_name=type(self).__name__,
99
+ steps=steps,
100
+ initial_train_size=initial_train_size,
101
+ fold_stride=fold_stride,
102
+ window_size=window_size,
103
+ differentiation=differentiation,
104
+ refit=refit,
105
+ fixed_train_size=fixed_train_size,
106
+ gap=gap,
107
+ skip_folds=skip_folds,
108
+ allow_incomplete_fold=allow_incomplete_fold,
109
+ return_all_indexes=return_all_indexes,
110
+ verbose=verbose,
111
+ )
112
+
113
+ self.initial_train_size = initial_train_size
114
+ self.window_size = window_size
115
+ self.differentiation = differentiation
116
+ self.return_all_indexes = return_all_indexes
117
+ self.verbose = verbose
118
+
119
+ def _validate_params(
120
+ self,
121
+ cv_name: str,
122
+ steps: int | None = None,
123
+ initial_train_size: int | str | pd.Timestamp | None = None,
124
+ fold_stride: int | None = None,
125
+ window_size: int | None = None,
126
+ differentiation: int | None = None,
127
+ refit: bool | int = False,
128
+ fixed_train_size: bool = True,
129
+ gap: int = 0,
130
+ skip_folds: int | list[int] | None = None,
131
+ allow_incomplete_fold: bool = True,
132
+ return_all_indexes: bool = False,
133
+ verbose: bool = True,
134
+ **kwargs,
135
+ ) -> None:
136
+ """
137
+ Validate all input parameters to ensure correctness.
138
+ """
139
+
140
+ if cv_name == "TimeSeriesFold":
141
+ if not isinstance(steps, (int, np.integer)) or steps < 1:
142
+ raise ValueError(
143
+ f"`steps` must be an integer greater than 0. Got {steps}."
144
+ )
145
+ if not isinstance(
146
+ initial_train_size, (int, np.integer, str, pd.Timestamp, type(None))
147
+ ):
148
+ raise ValueError(
149
+ f"`initial_train_size` must be an integer greater than 0, a date "
150
+ f"string, a pandas Timestamp, or None. Got {initial_train_size}."
151
+ )
152
+ if (
153
+ isinstance(initial_train_size, (int, np.integer))
154
+ and initial_train_size < 1
155
+ ):
156
+ raise ValueError(
157
+ f"`initial_train_size` must be an integer greater than 0, "
158
+ f"a date string, a pandas Timestamp, or None. Got {initial_train_size}."
159
+ )
160
+ if fold_stride is not None:
161
+ if not isinstance(fold_stride, (int, np.integer)) or fold_stride < 1:
162
+ raise ValueError(
163
+ f"`fold_stride` must be an integer greater than 0. Got {fold_stride}."
164
+ )
165
+ if not isinstance(refit, (bool, int, np.integer)):
166
+ raise TypeError(
167
+ f"`refit` must be a boolean or an integer equal or greater than 0. "
168
+ f"Got {refit}."
169
+ )
170
+ if (
171
+ isinstance(refit, (int, np.integer))
172
+ and not isinstance(refit, bool)
173
+ and refit < 0
174
+ ):
175
+ raise TypeError(
176
+ f"`refit` must be a boolean or an integer equal or greater than 0. "
177
+ f"Got {refit}."
178
+ )
179
+ if not isinstance(fixed_train_size, bool):
180
+ raise TypeError(
181
+ f"`fixed_train_size` must be a boolean: `True`, `False`. "
182
+ f"Got {fixed_train_size}."
183
+ )
184
+ if not isinstance(gap, (int, np.integer)) or gap < 0:
185
+ raise ValueError(
186
+ f"`gap` must be an integer greater than or equal to 0. Got {gap}."
187
+ )
188
+ if skip_folds is not None:
189
+ if not isinstance(skip_folds, (int, np.integer, list, type(None))):
190
+ raise TypeError(
191
+ f"`skip_folds` must be an integer greater than 0, a list of "
192
+ f"integers or `None`. Got {skip_folds}."
193
+ )
194
+ if isinstance(skip_folds, (int, np.integer)) and skip_folds < 1:
195
+ raise ValueError(
196
+ f"`skip_folds` must be an integer greater than 0, a list of "
197
+ f"integers or `None`. Got {skip_folds}."
198
+ )
199
+ if isinstance(skip_folds, list) and any([x < 1 for x in skip_folds]):
200
+ raise ValueError(
201
+ f"`skip_folds` list must contain integers greater than or "
202
+ f"equal to 1. The first fold is always needed to train the "
203
+ f"forecaster. Got {skip_folds}."
204
+ )
205
+ if not isinstance(allow_incomplete_fold, bool):
206
+ raise TypeError(
207
+ f"`allow_incomplete_fold` must be a boolean: `True`, `False`. "
208
+ f"Got {allow_incomplete_fold}."
209
+ )
210
+
211
+ if cv_name == "OneStepAheadFold":
212
+ if not isinstance(initial_train_size, (int, np.integer, str, pd.Timestamp)):
213
+ raise ValueError(
214
+ f"`initial_train_size` must be an integer greater than 0, a date "
215
+ f"string, or a pandas Timestamp. Got {initial_train_size}."
216
+ )
217
+ if (
218
+ isinstance(initial_train_size, (int, np.integer))
219
+ and initial_train_size < 1
220
+ ):
221
+ raise ValueError(
222
+ f"`initial_train_size` must be an integer greater than 0, "
223
+ f"a date string, or a pandas Timestamp. Got {initial_train_size}."
224
+ )
225
+
226
+ if (
227
+ not isinstance(window_size, (int, np.integer, pd.DateOffset, type(None)))
228
+ or isinstance(window_size, (int, np.integer))
229
+ and window_size < 1
230
+ ):
231
+ raise ValueError(
232
+ f"`window_size` must be an integer greater than 0. Got {window_size}."
233
+ )
234
+
235
+ if differentiation is not None:
236
+ if (
237
+ not isinstance(differentiation, (int, np.integer))
238
+ or differentiation < 0
239
+ ):
240
+ raise ValueError(
241
+ f"`differentiation` must be None or an integer greater than or "
242
+ f"equal to 0. Got {differentiation}."
243
+ )
244
+
245
+ if not isinstance(return_all_indexes, bool):
246
+ raise TypeError(
247
+ f"`return_all_indexes` must be a boolean: `True`, `False`. "
248
+ f"Got {return_all_indexes}."
249
+ )
250
+
251
+ if not isinstance(verbose, bool):
252
+ raise TypeError(
253
+ f"`verbose` must be a boolean: `True`, `False`. " f"Got {verbose}."
254
+ )
255
+
256
+ def _extract_index(
257
+ self,
258
+ X: pd.Series | pd.DataFrame | pd.Index | dict[str, pd.Series | pd.DataFrame],
259
+ ) -> pd.Index:
260
+ """
261
+ Extracts and returns the index from the input data X.
262
+
263
+ Args:
264
+ X (pd.Series | pd.DataFrame | pd.Index | dict): Time series data or
265
+ index to split.
266
+
267
+ Returns:
268
+ pd.Index: Index extracted from the input data.
269
+ """
270
+
271
+ if isinstance(X, (pd.Series, pd.DataFrame)):
272
+ idx = X.index
273
+ elif isinstance(X, dict):
274
+ indexes_freq = set()
275
+ not_valid_index = []
276
+ min_index = []
277
+ max_index = []
278
+ for k, v in X.items():
279
+ if v is None:
280
+ continue
281
+
282
+ idx = v.index
283
+ if isinstance(idx, pd.DatetimeIndex):
284
+ indexes_freq.add(idx.freq)
285
+ elif isinstance(idx, pd.RangeIndex):
286
+ indexes_freq.add(idx.step)
287
+ else:
288
+ not_valid_index.append(k)
289
+
290
+ min_index.append(idx[0])
291
+ max_index.append(idx[-1])
292
+
293
+ if not_valid_index:
294
+ raise TypeError(
295
+ f"If `X` is a dictionary, all series must have a Pandas "
296
+ f"RangeIndex or DatetimeIndex with the same step/frequency. "
297
+ f"Review series: {not_valid_index}"
298
+ )
299
+
300
+ if None in indexes_freq:
301
+ raise ValueError(
302
+ "If `X` is a dictionary, all series must have a Pandas "
303
+ "RangeIndex or DatetimeIndex with the same step/frequency. "
304
+ "Found series with no frequency or step."
305
+ )
306
+ if not len(indexes_freq) == 1:
307
+ raise ValueError(
308
+ f"If `X` is a dictionary, all series must have a Pandas "
309
+ f"RangeIndex or DatetimeIndex with the same step/frequency. "
310
+ f"Found frequencies: {sorted(indexes_freq)}"
311
+ )
312
+
313
+ if isinstance(idx, pd.DatetimeIndex):
314
+ idx = pd.date_range(
315
+ start=min(min_index), end=max(max_index), freq=indexes_freq.pop()
316
+ )
317
+ else:
318
+ idx = pd.RangeIndex(
319
+ start=min(min_index),
320
+ stop=max(max_index) + 1,
321
+ step=indexes_freq.pop(),
322
+ )
323
+ else:
324
+ idx = X
325
+
326
+ return idx
327
+
328
+ def set_params(self, params: dict) -> None:
329
+ """
330
+ Set the parameters of the Fold object. Before overwriting the current
331
+ parameters, the input parameters are validated to ensure correctness.
332
+
333
+ Args:
334
+ params (dict): Dictionary with the parameters to set.
335
+ """
336
+
337
+ if not isinstance(params, dict):
338
+ raise TypeError(f"`params` must be a dictionary. Got {type(params)}.")
339
+
340
+ current_params = dict(vars(self))
341
+ unknown_params = set(params.keys()) - set(current_params.keys())
342
+ if unknown_params:
343
+ warnings.warn(
344
+ f"Unknown parameters: {unknown_params}. They have been ignored.",
345
+ IgnoredArgumentWarning,
346
+ )
347
+
348
+ filtered_params = {k: v for k, v in params.items() if k in current_params}
349
+ updated_params = {
350
+ "cv_name": type(self).__name__,
351
+ **current_params,
352
+ **filtered_params,
353
+ }
354
+
355
+ self._validate_params(**updated_params)
356
+ for key, value in updated_params.items():
357
+ setattr(self, key, value)
@@ -0,0 +1,245 @@
1
+ """
2
+ One step ahead cross-validation splitting.
3
+ """
4
+
5
+ from __future__ import annotations
6
+ from typing import Any
7
+ import itertools
8
+ import pandas as pd
9
+
10
+ from spotforecast2.forecaster.utils import date_to_index_position, get_style_repr_html
11
+ from .split_base import BaseFold
12
+
13
+
14
+ class OneStepAheadFold(BaseFold):
15
+ """
16
+ Class to split time series data into train and test folds for one-step-ahead
17
+ forecasting.
18
+
19
+ Args:
20
+ initial_train_size (int | str | pd.Timestamp): Number of observations used
21
+ for initial training.
22
+
23
+ - If an integer, the number of observations used for initial training.
24
+ - If a date string or pandas Timestamp, it is the last date included in
25
+ the initial training set.
26
+ window_size (int, optional): Number of observations needed to generate the
27
+ autoregressive predictors. Defaults to None.
28
+ differentiation (int, optional): Number of observations to use for differentiation.
29
+ This is used to extend the `last_window` as many observations as the
30
+ differentiation order. Defaults to None.
31
+ return_all_indexes (bool, optional): Whether to return all indexes or only the
32
+ start and end indexes of each fold. Defaults to False.
33
+ verbose (bool, optional): Whether to print information about generated folds.
34
+ Defaults to True.
35
+
36
+ Attributes:
37
+ initial_train_size (int): Number of observations used for initial training.
38
+ window_size (int): Number of observations needed to generate the
39
+ autoregressive predictors.
40
+ differentiation (int): Number of observations to use for differentiation.
41
+ This is used to extend the `last_window` as many observations as the
42
+ differentiation order.
43
+ return_all_indexes (bool): Whether to return all indexes or only the start
44
+ and end indexes of each fold.
45
+ verbose (bool): Whether to print information about generated folds.
46
+ """
47
+
48
+ def __init__(
49
+ self,
50
+ initial_train_size: int | str | pd.Timestamp,
51
+ window_size: int | None = None,
52
+ differentiation: int | None = None,
53
+ return_all_indexes: bool = False,
54
+ verbose: bool = True,
55
+ ) -> None:
56
+
57
+ super().__init__(
58
+ initial_train_size=initial_train_size,
59
+ window_size=window_size,
60
+ differentiation=differentiation,
61
+ return_all_indexes=return_all_indexes,
62
+ verbose=verbose,
63
+ )
64
+
65
+ def __repr__(self) -> str:
66
+ """
67
+ Information displayed when printed.
68
+ """
69
+
70
+ info = (
71
+ f"{'=' * len(type(self).__name__)} \n"
72
+ f"{type(self).__name__} \n"
73
+ f"{'=' * len(type(self).__name__)} \n"
74
+ f"Initial train size = {self.initial_train_size},\n"
75
+ f"Window size = {self.window_size},\n"
76
+ f"Differentiation = {self.differentiation},\n"
77
+ f"Return all indexes = {self.return_all_indexes},\n"
78
+ f"Verbose = {self.verbose}\n"
79
+ )
80
+
81
+ return info
82
+
83
+ def _repr_html_(self) -> str:
84
+ """
85
+ HTML representation of the object.
86
+ The "General Information" section is expanded by default.
87
+ """
88
+
89
+ style, unique_id = get_style_repr_html()
90
+ content = f"""
91
+ <div class="container-{unique_id}">
92
+ <p style="font-size: 1.5em; font-weight: bold; margin-block-start: 0.83em; margin-block-end: 0.83em;">{type(self).__name__}</p>
93
+ <details open>
94
+ <summary>General Information</summary>
95
+ <ul>
96
+ <li><strong>Initial train size:</strong> {self.initial_train_size}</li>
97
+ <li><strong>Window size:</strong> {self.window_size}</li>
98
+ <li><strong>Differentiation:</strong> {self.differentiation}</li>
99
+ <li><strong>Return all indexes:</strong> {self.return_all_indexes}</li>
100
+ </ul>
101
+ </details>
102
+ </div>
103
+ """
104
+
105
+ return style + content
106
+
107
+ def split(
108
+ self,
109
+ X: pd.Series | pd.DataFrame | pd.Index | dict[str, pd.Series | pd.DataFrame],
110
+ as_pandas: bool = False,
111
+ externally_fitted: Any = None,
112
+ ) -> list | pd.DataFrame:
113
+ """
114
+ Split the time series data into train and test folds.
115
+
116
+ Args:
117
+ X (pd.Series | pd.DataFrame | pd.Index | dict): Time series data or index to split.
118
+ as_pandas (bool, optional): If True, the folds are returned as a DataFrame.
119
+ This is useful to visualize the folds in a more interpretable way.
120
+ Defaults to False.
121
+ externally_fitted (Any, optional): This argument is not used in this class.
122
+ It is included for API consistency. Defaults to None.
123
+
124
+ Returns:
125
+ list | pd.DataFrame: A list of lists containing the indices (position) of
126
+ the fold. The list contains 2 lists with the following information:
127
+
128
+ - fold: fold number.
129
+ - [train_start, train_end]: list with the start and end positions of the
130
+ training set.
131
+ - [test_start, test_end]: list with the start and end positions of the test
132
+ set. These are the observations used to evaluate the forecaster.
133
+ - fit_forecaster: boolean indicating whether the forecaster should be fitted
134
+ in this fold.
135
+
136
+ It is important to note that the returned values are the positions of the
137
+ observations and not the actual values of the index, so they can be used to
138
+ slice the data directly using iloc.
139
+
140
+ If `as_pandas` is `True`, the folds are returned as a DataFrame with the
141
+ following columns: 'fold', 'train_start', 'train_end', 'test_start',
142
+ 'test_end', 'fit_forecaster'.
143
+
144
+ Following the python convention, the start index is inclusive and the end
145
+ index is exclusive. This means that the last index is not included in the
146
+ slice.
147
+ """
148
+
149
+ if not isinstance(X, (pd.Series, pd.DataFrame, pd.Index, dict)):
150
+ raise TypeError(
151
+ f"X must be a pandas Series, DataFrame, Index or a dictionary. "
152
+ f"Got {type(X)}."
153
+ )
154
+
155
+ index = self._extract_index(X)
156
+
157
+ self.initial_train_size = date_to_index_position(
158
+ index=index,
159
+ date_input=self.initial_train_size,
160
+ method="validation",
161
+ date_literal="initial_train_size",
162
+ )
163
+
164
+ fold = [
165
+ 0,
166
+ [0, self.initial_train_size - 1],
167
+ [self.initial_train_size, len(X)],
168
+ True,
169
+ ]
170
+
171
+ if self.verbose:
172
+ self._print_info(index=index, fold=fold)
173
+
174
+ # NOTE: +1 to prevent iloc pandas from deleting the last observation
175
+ if self.return_all_indexes:
176
+ fold = [
177
+ fold[0],
178
+ [range(fold[1][0], fold[1][1] + 1)],
179
+ [range(fold[2][0], fold[2][1])],
180
+ fold[3],
181
+ ]
182
+ else:
183
+ fold = [
184
+ fold[0],
185
+ [fold[1][0], fold[1][1] + 1],
186
+ [fold[2][0], fold[2][1]],
187
+ fold[3],
188
+ ]
189
+
190
+ if as_pandas:
191
+ if not self.return_all_indexes:
192
+ fold = pd.DataFrame(
193
+ data=[[fold[0]] + list(itertools.chain(*fold[1:-1])) + [fold[-1]]],
194
+ columns=[
195
+ "fold",
196
+ "train_start",
197
+ "train_end",
198
+ "test_start",
199
+ "test_end",
200
+ "fit_forecaster",
201
+ ],
202
+ )
203
+ else:
204
+ fold = pd.DataFrame(
205
+ data=[fold],
206
+ columns=["fold", "train_index", "test_index", "fit_forecaster"],
207
+ )
208
+
209
+ return fold
210
+
211
+ def _print_info(self, index: pd.Index, fold: list[list[int]]) -> None:
212
+ """
213
+ Print information about folds.
214
+
215
+ Args:
216
+ index (pd.Index): Index of the time series data.
217
+ fold (list): A list of lists containing the indices (position) of the fold.
218
+ """
219
+
220
+ if self.differentiation is None:
221
+ differentiation = 0
222
+ else:
223
+ differentiation = self.differentiation
224
+
225
+ initial_train_size = self.initial_train_size - differentiation
226
+ test_length = len(index) - (initial_train_size + differentiation)
227
+
228
+ print("Information of folds")
229
+ print("--------------------")
230
+ print(f"Number of observations in train: {initial_train_size}")
231
+ if self.differentiation is not None:
232
+ print(
233
+ f" First {differentiation} observation/s in training set "
234
+ f"are used for differentiation"
235
+ )
236
+ print(f"Number of observations in test: {test_length}")
237
+
238
+ training_start = index[fold[1][0] + differentiation]
239
+ training_end = index[fold[1][-1]]
240
+ test_start = index[fold[2][0]]
241
+ test_end = index[fold[2][-1] - 1]
242
+
243
+ print(f"Training : {training_start} -- {training_end} (n={initial_train_size})")
244
+ print(f"Test : {test_start} -- {test_end} (n={test_length})")
245
+ print("")