spotforecast2 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. spotforecast2/.DS_Store +0 -0
  2. spotforecast2/__init__.py +2 -0
  3. spotforecast2/data/__init__.py +0 -0
  4. spotforecast2/data/data.py +130 -0
  5. spotforecast2/data/fetch_data.py +209 -0
  6. spotforecast2/exceptions.py +681 -0
  7. spotforecast2/forecaster/.DS_Store +0 -0
  8. spotforecast2/forecaster/__init__.py +7 -0
  9. spotforecast2/forecaster/base.py +448 -0
  10. spotforecast2/forecaster/metrics.py +527 -0
  11. spotforecast2/forecaster/recursive/__init__.py +4 -0
  12. spotforecast2/forecaster/recursive/_forecaster_equivalent_date.py +1075 -0
  13. spotforecast2/forecaster/recursive/_forecaster_recursive.py +939 -0
  14. spotforecast2/forecaster/recursive/_warnings.py +15 -0
  15. spotforecast2/forecaster/utils.py +954 -0
  16. spotforecast2/model_selection/__init__.py +5 -0
  17. spotforecast2/model_selection/bayesian_search.py +453 -0
  18. spotforecast2/model_selection/grid_search.py +314 -0
  19. spotforecast2/model_selection/random_search.py +151 -0
  20. spotforecast2/model_selection/split_base.py +357 -0
  21. spotforecast2/model_selection/split_one_step.py +245 -0
  22. spotforecast2/model_selection/split_ts_cv.py +634 -0
  23. spotforecast2/model_selection/utils_common.py +718 -0
  24. spotforecast2/model_selection/utils_metrics.py +103 -0
  25. spotforecast2/model_selection/validation.py +685 -0
  26. spotforecast2/preprocessing/__init__.py +30 -0
  27. spotforecast2/preprocessing/_binner.py +378 -0
  28. spotforecast2/preprocessing/_common.py +123 -0
  29. spotforecast2/preprocessing/_differentiator.py +123 -0
  30. spotforecast2/preprocessing/_rolling.py +136 -0
  31. spotforecast2/preprocessing/curate_data.py +254 -0
  32. spotforecast2/preprocessing/imputation.py +92 -0
  33. spotforecast2/preprocessing/outlier.py +114 -0
  34. spotforecast2/preprocessing/split.py +139 -0
  35. spotforecast2/py.typed +0 -0
  36. spotforecast2/utils/__init__.py +43 -0
  37. spotforecast2/utils/convert_to_utc.py +44 -0
  38. spotforecast2/utils/data_transform.py +208 -0
  39. spotforecast2/utils/forecaster_config.py +344 -0
  40. spotforecast2/utils/generate_holiday.py +70 -0
  41. spotforecast2/utils/validation.py +569 -0
  42. spotforecast2/weather/__init__.py +0 -0
  43. spotforecast2/weather/weather_client.py +288 -0
  44. spotforecast2-0.0.1.dist-info/METADATA +47 -0
  45. spotforecast2-0.0.1.dist-info/RECORD +46 -0
  46. spotforecast2-0.0.1.dist-info/WHEEL +4 -0
@@ -0,0 +1,634 @@
1
+ """
2
+ Time series cross-validation splitting.
3
+ """
4
+
5
+ from __future__ import annotations
6
+ import warnings
7
+ import itertools
8
+ import numpy as np
9
+ import pandas as pd
10
+
11
+ from spotforecast2.forecaster.utils import date_to_index_position, get_style_repr_html
12
+ from spotforecast2.exceptions import IgnoredArgumentWarning
13
+ from .split_base import BaseFold
14
+
15
+
16
+ class TimeSeriesFold(BaseFold):
17
+ """
18
+ Class to split time series data into train and test folds.
19
+ When used within a backtesting or hyperparameter search, the arguments
20
+ 'initial_train_size', 'window_size' and 'differentiation' are not required
21
+ as they are automatically set by the backtesting or hyperparameter search
22
+ functions.
23
+
24
+ Args:
25
+ steps (int): Number of observations used to be predicted in each fold.
26
+ This is also commonly referred to as the forecast horizon or test size.
27
+ initial_train_size (int | str | pd.Timestamp, optional): Number of observations
28
+ used for initial training.
29
+
30
+ - If `None` or 0, the initial forecaster is not trained in the first fold.
31
+ - If an integer, the number of observations used for initial training.
32
+ - If a date string or pandas Timestamp, it is the last date included in
33
+ the initial training set.
34
+ Defaults to None.
35
+ fold_stride (int, optional): Number of observations that the start of the test
36
+ set advances between consecutive folds.
37
+
38
+ - If `None`, it defaults to the same value as `steps`, meaning that folds
39
+ are placed back-to-back without overlap.
40
+ - If `fold_stride < steps`, test sets overlap and multiple forecasts will
41
+ be generated for the same observations.
42
+ - If `fold_stride > steps`, gaps are left between consecutive test sets.
43
+ **New in version 0.18.0**
44
+ Defaults to None.
45
+ window_size (int, optional): Number of observations needed to generate the
46
+ autoregressive predictors. Defaults to None.
47
+ differentiation (int, optional): Number of observations to use for differentiation.
48
+ This is used to extend the `last_window` as many observations as the
49
+ differentiation order. Defaults to None.
50
+ refit (bool | int, optional): Whether to refit the forecaster in each fold.
51
+
52
+ - If `True`, the forecaster is refitted in each fold.
53
+ - If `False`, the forecaster is trained only in the first fold.
54
+ - If an integer, the forecaster is trained in the first fold and then refitted
55
+ every `refit` folds.
56
+ Defaults to False.
57
+ fixed_train_size (bool, optional): Whether the training size is fixed or increases
58
+ in each fold. Defaults to True.
59
+ gap (int, optional): Number of observations between the end of the training set
60
+ and the start of the test set. Defaults to 0.
61
+ skip_folds (int | list, optional): Number of folds to skip.
62
+
63
+ - If an integer, every 'skip_folds'-th is returned.
64
+ - If a list, the indexes of the folds to skip.
65
+
66
+ For example, if `skip_folds=3` and there are 10 folds, the returned folds are
67
+ 0, 3, 6, and 9. If `skip_folds=[1, 2, 3]`, the returned folds are 0, 4, 5, 6, 7,
68
+ 8, and 9. Defaults to None.
69
+ allow_incomplete_fold (bool, optional): Whether to allow the last fold to include
70
+ fewer observations than `steps`. If `False`, the last fold is excluded if it
71
+ is incomplete. Defaults to True.
72
+ return_all_indexes (bool, optional): Whether to return all indexes or only the
73
+ start and end indexes of each fold. Defaults to False.
74
+ verbose (bool, optional): Whether to print information about generated folds.
75
+ Defaults to True.
76
+
77
+ Attributes:
78
+ steps (int): Number of observations used to be predicted in each fold.
79
+ initial_train_size (int): Number of observations used for initial training.
80
+ If `None` or 0, the initial forecaster is not trained in the first fold.
81
+ fold_stride (int): Number of observations that the start of the test set
82
+ advances between consecutive folds.
83
+ overlapping_folds (bool): Whether the folds overlap.
84
+ window_size (int): Number of observations needed to generate the
85
+ autoregressive predictors.
86
+ differentiation (int): Number of observations to use for differentiation.
87
+ This is used to extend the `last_window` as many observations as the
88
+ differentiation order.
89
+ refit (bool | int): Whether to refit the forecaster in each fold.
90
+ fixed_train_size (bool): Whether the training size is fixed or increases in each fold.
91
+ gap (int): Number of observations between the end of the training set and the
92
+ start of the test set.
93
+ skip_folds (int | list): Number of folds to skip.
94
+ allow_incomplete_fold (bool): Whether to allow the last fold to include fewer
95
+ observations than `steps`.
96
+ return_all_indexes (bool): Whether to return all indexes or only the start
97
+ and end indexes of each fold.
98
+ verbose (bool): Whether to print information about generated folds.
99
+
100
+ Note:
101
+ Returned values are the positions of the observations and not the actual values of
102
+ the index, so they can be used to slice the data directly using iloc. For example,
103
+ if the input series is `X = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]`, the
104
+ `initial_train_size = 3`, `window_size = 2`, `steps = 4`, and `gap = 1`,
105
+ the output of the first fold will: [0, [0, 3], [1, 3], [3, 8], [4, 8], True].
106
+
107
+ The first element is the fold number, the first list `[0, 3]` indicates that
108
+ the training set goes from the first to the third observation. The second
109
+ list `[1, 3]` indicates that the last window seen by the forecaster during
110
+ training goes from the second to the third observation. The third list `[3, 8]`
111
+ indicates that the test set goes from the fourth to the eighth observation.
112
+ The fourth list `[4, 8]` indicates that the test set including the gap goes
113
+ from the fifth to the eighth observation. The boolean `False` indicates that
114
+ the forecaster should not be trained in this fold.
115
+
116
+ Following the python convention, the start index is inclusive and the end index is
117
+ exclusive. This means that the last index is not included in the slice.
118
+
119
+ As an example, with `initial_train_size=50`, `steps=30`, and `fold_stride=7`,
120
+ the first test fold will cover observations [50, 80), the second fold [57, 87),
121
+ and the third fold [64, 94). This configuration produces multiple forecasts
122
+ for the same observations, which is often desirable in rolling-origin
123
+ evaluation.
124
+ """
125
+
126
+ def __init__(
127
+ self,
128
+ steps: int,
129
+ initial_train_size: int | str | pd.Timestamp | None = None,
130
+ fold_stride: int | None = None,
131
+ window_size: int | None = None,
132
+ differentiation: int | None = None,
133
+ refit: bool | int = False,
134
+ fixed_train_size: bool = True,
135
+ gap: int = 0,
136
+ skip_folds: int | list[int] | None = None,
137
+ allow_incomplete_fold: bool = True,
138
+ return_all_indexes: bool = False,
139
+ verbose: bool = True,
140
+ ) -> None:
141
+
142
+ super().__init__(
143
+ steps=steps,
144
+ initial_train_size=initial_train_size,
145
+ fold_stride=fold_stride,
146
+ window_size=window_size,
147
+ differentiation=differentiation,
148
+ refit=refit,
149
+ fixed_train_size=fixed_train_size,
150
+ gap=gap,
151
+ skip_folds=skip_folds,
152
+ allow_incomplete_fold=allow_incomplete_fold,
153
+ return_all_indexes=return_all_indexes,
154
+ verbose=verbose,
155
+ )
156
+
157
+ self.steps = steps
158
+ self.fold_stride = fold_stride if fold_stride is not None else steps
159
+ self.overlapping_folds = self.fold_stride < self.steps
160
+ self.refit = refit
161
+ self.fixed_train_size = fixed_train_size
162
+ self.gap = gap
163
+ self.skip_folds = skip_folds
164
+ self.allow_incomplete_fold = allow_incomplete_fold
165
+
166
+ def __repr__(self) -> str:
167
+ """
168
+ Information displayed when printed.
169
+ """
170
+
171
+ info = (
172
+ f"{'=' * len(type(self).__name__)} \n"
173
+ f"{type(self).__name__} \n"
174
+ f"{'=' * len(type(self).__name__)} \n"
175
+ f"Initial train size = {self.initial_train_size},\n"
176
+ f"Steps = {self.steps},\n"
177
+ f"Fold stride = {self.fold_stride},\n"
178
+ f"Overlapping folds = {self.overlapping_folds},\n"
179
+ f"Window size = {self.window_size},\n"
180
+ f"Differentiation = {self.differentiation},\n"
181
+ f"Refit = {self.refit},\n"
182
+ f"Fixed train size = {self.fixed_train_size},\n"
183
+ f"Gap = {self.gap},\n"
184
+ f"Skip folds = {self.skip_folds},\n"
185
+ f"Allow incomplete fold = {self.allow_incomplete_fold},\n"
186
+ f"Return all indexes = {self.return_all_indexes},\n"
187
+ f"Verbose = {self.verbose}\n"
188
+ )
189
+
190
+ return info
191
+
192
+ def _repr_html_(self) -> str:
193
+ """
194
+ HTML representation of the object.
195
+ The "General Information" section is expanded by default.
196
+ """
197
+
198
+ style, unique_id = get_style_repr_html()
199
+ content = f"""
200
+ <div class="container-{unique_id}">
201
+ <p style="font-size: 1.5em; font-weight: bold; margin-block-start: 0.83em; margin-block-end: 0.83em;">{type(self).__name__}</p>
202
+ <details open>
203
+ <summary>General Information</summary>
204
+ <ul>
205
+ <li><strong>Initial train size:</strong> {self.initial_train_size}</li>
206
+ <li><strong>Steps:</strong> {self.steps}</li>
207
+ <li><strong>Fold stride:</strong> {self.fold_stride}</li>
208
+ <li><strong>Overlapping folds:</strong> {self.overlapping_folds}</li>
209
+ <li><strong>Window size:</strong> {self.window_size}</li>
210
+ <li><strong>Differentiation:</strong> {self.differentiation}</li>
211
+ <li><strong>Refit:</strong> {self.refit}</li>
212
+ <li><strong>Fixed train size:</strong> {self.fixed_train_size}</li>
213
+ <li><strong>Gap:</strong> {self.gap}</li>
214
+ <li><strong>Skip folds:</strong> {self.skip_folds}</li>
215
+ <li><strong>Allow incomplete fold:</strong> {self.allow_incomplete_fold}</li>
216
+ <li><strong>Return all indexes:</strong> {self.return_all_indexes}</li>
217
+ </ul>
218
+ </details>
219
+ </div>
220
+ """
221
+
222
+ return style + content
223
+
224
+ def split(
225
+ self,
226
+ X: pd.Series | pd.DataFrame | pd.Index | dict[str, pd.Series | pd.DataFrame],
227
+ as_pandas: bool = False,
228
+ ) -> list | pd.DataFrame:
229
+ """
230
+ Split the time series data into train and test folds.
231
+
232
+ Args:
233
+ X (pd.Series | pd.DataFrame | pd.Index | dict): Time series data or index to split.
234
+ as_pandas (bool, optional): If True, the folds are returned as a DataFrame.
235
+ This is useful to visualize the folds in a more interpretable way.
236
+ Defaults to False.
237
+
238
+ Returns:
239
+ list | pd.DataFrame: A list of lists containing the indices (position) for
240
+ each fold. Each list contains 4 lists and a boolean with the following
241
+ information:
242
+
243
+ - fold: fold number.
244
+ - [train_start, train_end]: list with the start and end positions of the
245
+ training set.
246
+ - [last_window_start, last_window_end]: list with the start and end positions
247
+ of the last window seen by the forecaster during training. The last window
248
+ is used to generate the lags use as predictors. If `differentiation` is
249
+ included, the interval is extended as many observations as the
250
+ differentiation order. If the argument `window_size` is `None`, this list is
251
+ empty.
252
+ - [test_start, test_end]: list with the start and end positions of the test
253
+ set. These are the observations used to evaluate the forecaster.
254
+ - [test_start_with_gap, test_end_with_gap]: list with the start and end
255
+ positions of the test set including the gap. The gap is the number of
256
+ observations between the end of the training set and the start of the test
257
+ set.
258
+ - fit_forecaster: boolean indicating whether the forecaster should be fitted
259
+ in this fold.
260
+
261
+ It is important to note that the returned values are the positions of the
262
+ observations and not the actual values of the index, so they can be used to
263
+ slice the data directly using iloc.
264
+
265
+ If `as_pandas` is `True`, the folds are returned as a DataFrame with the
266
+ following columns: 'fold', 'train_start', 'train_end', 'last_window_start',
267
+ 'last_window_end', 'test_start', 'test_end', 'test_start_with_gap',
268
+ 'test_end_with_gap', 'fit_forecaster'.
269
+
270
+ Following the python convention, the start index is inclusive and the end
271
+ index is exclusive. This means that the last index is not included in the
272
+ slice.
273
+ """
274
+
275
+ if not isinstance(X, (pd.Series, pd.DataFrame, pd.Index, dict)):
276
+ raise TypeError(
277
+ f"X must be a pandas Series, DataFrame, Index or a dictionary. "
278
+ f"Got {type(X)}."
279
+ )
280
+
281
+ window_size_as_date_offset = isinstance(
282
+ self.window_size, pd.tseries.offsets.DateOffset
283
+ )
284
+ if window_size_as_date_offset:
285
+ # Calculate the window_size in steps. This is not a exact calculation
286
+ # because the offset follows the calendar rules and the distance between
287
+ # two dates may not be constant.
288
+ first_valid_index = X.index[-1] - self.window_size
289
+ try:
290
+ window_size_idx_start = X.index.get_loc(first_valid_index)
291
+ window_size_idx_end = X.index.get_loc(X.index[-1])
292
+ self.window_size = window_size_idx_end - window_size_idx_start
293
+ except KeyError:
294
+ raise ValueError(
295
+ f"The length of `y` ({len(X)}), must be greater than or equal "
296
+ f"to the window size ({self.window_size}). This is because "
297
+ f"the offset (forecaster.offset) is larger than the available "
298
+ f"data. Try to decrease the size of the offset (forecaster.offset), "
299
+ f"the number of `n_offsets` (forecaster.n_offsets) or increase the "
300
+ f"size of `y`."
301
+ )
302
+
303
+ if self.initial_train_size is None:
304
+ if self.window_size is None:
305
+ raise ValueError(
306
+ "To use split method when `initial_train_size` is None, "
307
+ "`window_size` must be an integer greater than 0. "
308
+ "Although no initial training is done and all data is used to "
309
+ "evaluate the model, the first `window_size` observations are "
310
+ "needed to create the initial predictors. Got `window_size` = None."
311
+ )
312
+ if self.refit:
313
+ raise ValueError(
314
+ "`refit` is only allowed when `initial_train_size` is not `None`. "
315
+ "Set `refit` to `False` if you want to use `initial_train_size = None`."
316
+ )
317
+ externally_fitted = True
318
+ self.initial_train_size = self.window_size # Reset to None later
319
+ else:
320
+ if self.window_size is None:
321
+ warnings.warn(
322
+ "Last window cannot be calculated because `window_size` is None.",
323
+ IgnoredArgumentWarning,
324
+ )
325
+ externally_fitted = False
326
+
327
+ index = self._extract_index(X)
328
+ idx = range(len(index))
329
+ folds = []
330
+ i = 0
331
+
332
+ self.initial_train_size = date_to_index_position(
333
+ index=index,
334
+ date_input=self.initial_train_size,
335
+ method="validation",
336
+ date_literal="initial_train_size",
337
+ )
338
+
339
+ if window_size_as_date_offset:
340
+ if self.initial_train_size is not None:
341
+ if self.initial_train_size < self.window_size:
342
+ raise ValueError(
343
+ f"If `initial_train_size` is an integer, it must be greater than "
344
+ f"the `window_size` of the forecaster ({self.window_size}) "
345
+ f"and smaller than the length of the series ({len(X)}). If "
346
+ f"it is a date, it must be within this range of the index."
347
+ )
348
+
349
+ if self.allow_incomplete_fold:
350
+ # At least one observation after the gap to allow incomplete fold
351
+ if len(index) <= self.initial_train_size + self.gap:
352
+ raise ValueError(
353
+ f"The time series must have more than `initial_train_size + gap` "
354
+ f"observations to create at least one fold.\n"
355
+ f" Time series length: {len(index)}\n"
356
+ f" Required > {self.initial_train_size + self.gap}\n"
357
+ f" initial_train_size: {self.initial_train_size}\n"
358
+ f" gap: {self.gap}\n"
359
+ )
360
+ else:
361
+ # At least one complete fold
362
+ if len(index) < self.initial_train_size + self.gap + self.steps:
363
+ raise ValueError(
364
+ f"The time series must have at least `initial_train_size + gap + steps` "
365
+ f"observations to create a minimum of one complete fold "
366
+ f"(allow_incomplete_fold=False).\n"
367
+ f" Time series length: {len(index)}\n"
368
+ f" Required >= {self.initial_train_size + self.gap + self.steps}\n"
369
+ f" initial_train_size: {self.initial_train_size}\n"
370
+ f" gap: {self.gap}\n"
371
+ f" steps: {self.steps}\n"
372
+ )
373
+
374
+ while self.initial_train_size + (i * self.fold_stride) + self.gap < len(index):
375
+
376
+ if self.refit:
377
+ # NOTE: If `fixed_train_size` the train size doesn't increase but
378
+ # moves by `fold_stride` positions in each iteration. If `False`,
379
+ # the train size increases by `fold_stride` in each iteration.
380
+ train_iloc_start = (
381
+ i * (self.fold_stride) if self.fixed_train_size else 0
382
+ )
383
+ train_iloc_end = self.initial_train_size + i * (self.fold_stride)
384
+ test_iloc_start = train_iloc_end
385
+ else:
386
+ # NOTE: The train size doesn't increase and doesn't move.
387
+ train_iloc_start = 0
388
+ train_iloc_end = self.initial_train_size
389
+ test_iloc_start = self.initial_train_size + i * (self.fold_stride)
390
+
391
+ if self.window_size is not None:
392
+ last_window_iloc_start = test_iloc_start - self.window_size
393
+
394
+ test_iloc_end = test_iloc_start + self.gap + self.steps
395
+
396
+ partitions = [
397
+ idx[train_iloc_start:train_iloc_end],
398
+ (
399
+ idx[last_window_iloc_start:test_iloc_start]
400
+ if self.window_size is not None
401
+ else []
402
+ ),
403
+ idx[test_iloc_start:test_iloc_end],
404
+ idx[test_iloc_start + self.gap : test_iloc_end],
405
+ ]
406
+ folds.append(partitions)
407
+ i += 1
408
+
409
+ # NOTE: Delete all incomplete folds at the end if not allowed
410
+ n_removed_folds = 0
411
+ if not self.allow_incomplete_fold:
412
+ # NOTE: While folds and the last "test_index_with_gap" is incomplete,
413
+ # calculating len of range objects
414
+ while folds and len(folds[-1][3]) < self.steps:
415
+ folds.pop()
416
+ n_removed_folds += 1
417
+
418
+ # Replace partitions inside folds with length 0 with `None`
419
+ folds = [
420
+ [partition if len(partition) > 0 else None for partition in fold]
421
+ for fold in folds
422
+ ]
423
+
424
+ # Create a flag to know whether to train the forecaster
425
+ if self.refit == 0:
426
+ self.refit = False
427
+
428
+ if isinstance(self.refit, bool):
429
+ fit_forecaster = [self.refit] * len(folds)
430
+ fit_forecaster[0] = True
431
+ else:
432
+ fit_forecaster = [False] * len(folds)
433
+ for i in range(0, len(fit_forecaster), self.refit):
434
+ fit_forecaster[i] = True
435
+
436
+ for i in range(len(folds)):
437
+ folds[i].insert(0, i)
438
+ folds[i].append(fit_forecaster[i])
439
+ if fit_forecaster[i] is False:
440
+ folds[i][1] = folds[i - 1][1]
441
+
442
+ index_to_skip = []
443
+ if self.skip_folds is not None:
444
+ if isinstance(self.skip_folds, (int, np.integer)) and self.skip_folds > 0:
445
+ index_to_keep = np.arange(0, len(folds), self.skip_folds)
446
+ index_to_skip = np.setdiff1d(
447
+ np.arange(0, len(folds)), index_to_keep, assume_unique=True
448
+ )
449
+ index_to_skip = [
450
+ int(x) for x in index_to_skip
451
+ ] # Required since numpy 2.0
452
+ if isinstance(self.skip_folds, list):
453
+ index_to_skip = [i for i in self.skip_folds if i < len(folds)]
454
+
455
+ if self.verbose:
456
+ self._print_info(
457
+ index=index,
458
+ folds=folds,
459
+ externally_fitted=externally_fitted,
460
+ n_removed_folds=n_removed_folds,
461
+ index_to_skip=index_to_skip,
462
+ )
463
+
464
+ folds = [fold for i, fold in enumerate(folds) if i not in index_to_skip]
465
+ if not self.return_all_indexes:
466
+ # NOTE: +1 to prevent iloc pandas from deleting the last observation
467
+ folds = [
468
+ [
469
+ fold[0],
470
+ [fold[1][0], fold[1][-1] + 1],
471
+ (
472
+ [fold[2][0], fold[2][-1] + 1]
473
+ if self.window_size is not None
474
+ else []
475
+ ),
476
+ [fold[3][0], fold[3][-1] + 1],
477
+ [fold[4][0], fold[4][-1] + 1],
478
+ fold[5],
479
+ ]
480
+ for fold in folds
481
+ ]
482
+
483
+ if externally_fitted:
484
+ self.initial_train_size = None
485
+ folds[0][5] = False
486
+
487
+ if as_pandas:
488
+ if self.window_size is None:
489
+ for fold in folds:
490
+ fold[2] = [None, None]
491
+
492
+ if not self.return_all_indexes:
493
+ folds = pd.DataFrame(
494
+ data=[
495
+ [fold[0]] + list(itertools.chain(*fold[1:-1])) + [fold[-1]]
496
+ for fold in folds
497
+ ],
498
+ columns=[
499
+ "fold",
500
+ "train_start",
501
+ "train_end",
502
+ "last_window_start",
503
+ "last_window_end",
504
+ "test_start",
505
+ "test_end",
506
+ "test_start_with_gap",
507
+ "test_end_with_gap",
508
+ "fit_forecaster",
509
+ ],
510
+ )
511
+ else:
512
+ folds = pd.DataFrame(
513
+ data=folds,
514
+ columns=[
515
+ "fold",
516
+ "train_index",
517
+ "last_window_index",
518
+ "test_index",
519
+ "test_index_with_gap",
520
+ "fit_forecaster",
521
+ ],
522
+ )
523
+
524
+ return folds
525
+
526
+ def _print_info(
527
+ self,
528
+ index: pd.Index,
529
+ folds: list[list[int]],
530
+ externally_fitted: bool,
531
+ n_removed_folds: int,
532
+ index_to_skip: list[int],
533
+ ) -> None:
534
+ """
535
+ Print information about folds.
536
+
537
+ Args:
538
+ index (pd.Index): Index of the time series data.
539
+ folds (list): A list of lists containing the indices (position) for each fold.
540
+ externally_fitted (bool): Whether an already trained forecaster is to be used.
541
+ n_removed_folds (int): Number of folds removed.
542
+ index_to_skip (list): Number of folds skipped.
543
+ """
544
+
545
+ print("Information of folds")
546
+ print("--------------------")
547
+ if externally_fitted:
548
+ print(
549
+ f"An already trained forecaster is to be used. Window size: "
550
+ f"{self.window_size}"
551
+ )
552
+ else:
553
+ if self.differentiation is None:
554
+ print(
555
+ f"Number of observations used for initial training: "
556
+ f"{self.initial_train_size}"
557
+ )
558
+ else:
559
+ print(
560
+ f"Number of observations used for initial training: "
561
+ f"{self.initial_train_size - self.differentiation}"
562
+ )
563
+ print(
564
+ f" First {self.differentiation} observation/s in training sets "
565
+ f"are used for differentiation"
566
+ )
567
+ print(
568
+ f"Number of observations used for backtesting: "
569
+ f"{len(index) - self.initial_train_size}"
570
+ )
571
+ print(f" Number of folds: {len(folds)}")
572
+ print(
573
+ f" Number skipped folds: "
574
+ f"{len(index_to_skip)} {index_to_skip if index_to_skip else ''}"
575
+ )
576
+ print(f" Number of steps per fold: {self.steps}")
577
+ if self.steps != self.fold_stride:
578
+ print(
579
+ f" Number of steps to the next fold (fold stride): {self.fold_stride}"
580
+ )
581
+ print(
582
+ f" Number of steps to exclude between last observed data "
583
+ f"(last window) and predictions (gap): {self.gap}"
584
+ )
585
+ if n_removed_folds > 0:
586
+ print(
587
+ f" The last {n_removed_folds} fold(s) have been excluded "
588
+ f"because they were incomplete."
589
+ )
590
+
591
+ if len(folds[-1][4]) < self.steps:
592
+ print(f" Last fold only includes {len(folds[-1][4])} observations.")
593
+
594
+ print("")
595
+
596
+ if self.differentiation is None:
597
+ differentiation = 0
598
+ else:
599
+ differentiation = self.differentiation
600
+
601
+ for i, fold in enumerate(folds):
602
+ is_fold_skipped = i in index_to_skip
603
+ has_training = fold[-1] if i != 0 else True
604
+ training_start = (
605
+ index[fold[1][0] + differentiation] if fold[1] is not None else None
606
+ )
607
+ training_end = index[fold[1][-1]] if fold[1] is not None else None
608
+ training_length = (
609
+ len(fold[1]) - differentiation if fold[1] is not None else 0
610
+ )
611
+ validation_start = index[fold[4][0]]
612
+ validation_end = index[fold[4][-1]]
613
+ validation_length = len(fold[4])
614
+
615
+ print(f"Fold: {i}")
616
+ if is_fold_skipped:
617
+ print(" Fold skipped")
618
+ elif not externally_fitted and has_training:
619
+ print(
620
+ f" Training: {training_start} -- {training_end} "
621
+ f"(n={training_length})"
622
+ )
623
+ print(
624
+ f" Validation: {validation_start} -- {validation_end} "
625
+ f"(n={validation_length})"
626
+ )
627
+ else:
628
+ print(" Training: No training in this fold")
629
+ print(
630
+ f" Validation: {validation_start} -- {validation_end} "
631
+ f"(n={validation_length})"
632
+ )
633
+
634
+ print("")