spotforecast2-safe 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,687 +0,0 @@
1
- """
2
- Time series cross-validation splitting.
3
- """
4
-
5
- from __future__ import annotations
6
- import warnings
7
- import itertools
8
- import numpy as np
9
- import pandas as pd
10
-
11
- from spotforecast2_safe.forecaster.utils import (
12
- date_to_index_position,
13
- get_style_repr_html,
14
- )
15
- from spotforecast2_safe.exceptions import IgnoredArgumentWarning
16
- from .split_base import BaseFold
17
-
18
-
19
- class TimeSeriesFold(BaseFold):
20
- """Class to split time series data into train and test folds.
21
-
22
- When used within a backtesting or hyperparameter search, the arguments
23
- 'initial_train_size', 'window_size' and 'differentiation' are not required
24
- as they are automatically set by the backtesting or hyperparameter search
25
- functions.
26
-
27
- Args:
28
- steps: Number of observations used to be predicted in each fold.
29
- This is also commonly referred to as the forecast horizon or test size.
30
- initial_train_size: Number of observations used for initial training.
31
-
32
- - If `None` or 0, the initial forecaster is not trained in the first fold.
33
- - If an integer, the number of observations used for initial training.
34
- - If a date string or pandas Timestamp, it is the last date included in
35
- the initial training set.
36
-
37
- Defaults to None.
38
- fold_stride: Number of observations that the start of the test set
39
- advances between consecutive folds.
40
-
41
- - If `None`, it defaults to the same value as `steps`, meaning that folds
42
- are placed back-to-back without overlap.
43
- - If `fold_stride < steps`, test sets overlap and multiple forecasts will
44
- be generated for the same observations.
45
- - If `fold_stride > steps`, gaps are left between consecutive test sets.
46
-
47
- Defaults to None.
48
- window_size: Number of observations needed to generate the
49
- autoregressive predictors. Defaults to None.
50
- differentiation: Number of observations to use for differentiation.
51
- This is used to extend the `last_window` as many observations as the
52
- differentiation order. Defaults to None.
53
- refit: Whether to refit the forecaster in each fold.
54
-
55
- - If `True`, the forecaster is refitted in each fold.
56
- - If `False`, the forecaster is trained only in the first fold.
57
- - If an integer, the forecaster is trained in the first fold and then refitted
58
- every `refit` folds.
59
-
60
- Defaults to False.
61
- fixed_train_size: Whether the training size is fixed or increases
62
- in each fold. Defaults to True.
63
- gap: Number of observations between the end of the training set
64
- and the start of the test set. Defaults to 0.
65
- skip_folds: Number of folds to skip.
66
-
67
- - If an integer, every 'skip_folds'-th is returned.
68
- - If a list, the indexes of the folds to skip.
69
-
70
- For example, if `skip_folds=3` and there are 10 folds, the returned folds are
71
- 0, 3, 6, and 9. If `skip_folds=[1, 2, 3]`, the returned folds are 0, 4, 5, 6, 7,
72
- 8, and 9. Defaults to None.
73
- allow_incomplete_fold: Whether to allow the last fold to include
74
- fewer observations than `steps`. If `False`, the last fold is excluded if it
75
- is incomplete. Defaults to True.
76
- return_all_indexes: Whether to return all indexes or only the
77
- start and end indexes of each fold. Defaults to False.
78
- verbose: Whether to print information about generated folds.
79
- Defaults to True.
80
-
81
- Attributes:
82
- steps: Number of observations used to be predicted in each fold.
83
- initial_train_size: Number of observations used for initial training.
84
- If `None` or 0, the initial forecaster is not trained in the first fold.
85
- fold_stride: Number of observations that the start of the test set
86
- advances between consecutive folds.
87
- overlapping_folds: Whether the folds overlap.
88
- window_size: Number of observations needed to generate the
89
- autoregressive predictors.
90
- differentiation: Number of observations to use for differentiation.
91
- This is used to extend the `last_window` as many observations as the
92
- differentiation order.
93
- refit: Whether to refit the forecaster in each fold.
94
- fixed_train_size: Whether the training size is fixed or increases in each fold.
95
- gap: Number of observations between the end of the training set and the
96
- start of the test set.
97
- skip_folds: Number of folds to skip.
98
- allow_incomplete_fold: Whether to allow the last fold to include fewer
99
- observations than `steps`.
100
- return_all_indexes: Whether to return all indexes or only the start
101
- and end indexes of each fold.
102
- verbose: Whether to print information about generated folds.
103
-
104
- Examples:
105
- Basic usage with fixed train size:
106
- >>> import pandas as pd
107
- >>> import numpy as np
108
- >>> from spotforecast2.model_selection import TimeSeriesFold
109
- >>> # Create sample time series data
110
- >>> dates = pd.date_range('2020-01-01', periods=100, freq='D')
111
- >>> y = pd.Series(np.arange(100), index=dates)
112
- >>> # Create fold splitter
113
- >>> cv = TimeSeriesFold(
114
- ... steps=10,
115
- ... initial_train_size=50,
116
- ... refit=True,
117
- ... fixed_train_size=True
118
- ... )
119
- >>> # Get folds
120
- >>> folds = cv.split(y)
121
- >>> print(f"Number of folds: {len(folds)}")
122
- Number of folds: 4
123
-
124
- Overlapping folds with custom stride:
125
- >>> cv = TimeSeriesFold(
126
- ... steps=30,
127
- ... initial_train_size=50,
128
- ... fold_stride=7,
129
- ... fixed_train_size=False
130
- ... )
131
- >>> folds = cv.split(y)
132
- >>> # First test fold covers [50, 80), second [57, 87), etc.
133
-
134
- Return as pandas DataFrame:
135
- >>> cv = TimeSeriesFold(steps=10, initial_train_size=50)
136
- >>> folds_df = cv.split(y, as_pandas=True)
137
- >>> print(folds_df.columns.tolist())
138
- ['fold', 'train_start', 'train_end', 'last_window_start', 'last_window_end', 'test_start', 'test_end', 'test_start_with_gap', 'test_end_with_gap', 'fit_forecaster']
139
-
140
- Skip folds for faster evaluation:
141
- >>> cv = TimeSeriesFold(
142
- ... steps=5,
143
- ... initial_train_size=50,
144
- ... skip_folds=2
145
- ... )
146
- >>> folds = cv.split(y)
147
- >>> # Returns folds 0, 2, 4, 6, ...
148
-
149
- Note:
150
- Returned values are the positions of the observations and not the actual values of
151
- the index, so they can be used to slice the data directly using iloc. For example,
152
- if the input series is `X = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]`, the
153
- `initial_train_size = 3`, `window_size = 2`, `steps = 4`, and `gap = 1`,
154
- the output of the first fold will: [0, [0, 3], [1, 3], [3, 8], [4, 8], True].
155
-
156
- The first element is the fold number, the first list `[0, 3]` indicates that
157
- the training set goes from the first to the third observation. The second
158
- list `[1, 3]` indicates that the last window seen by the forecaster during
159
- training goes from the second to the third observation. The third list `[3, 8]`
160
- indicates that the test set goes from the fourth to the eighth observation.
161
- The fourth list `[4, 8]` indicates that the test set including the gap goes
162
- from the fifth to the eighth observation. The boolean `False` indicates that
163
- the forecaster should not be trained in this fold.
164
-
165
- Following the python convention, the start index is inclusive and the end index is
166
- exclusive. This means that the last index is not included in the slice.
167
-
168
- As an example, with `initial_train_size=50`, `steps=30`, and `fold_stride=7`,
169
- the first test fold will cover observations [50, 80), the second fold [57, 87),
170
- and the third fold [64, 94). This configuration produces multiple forecasts
171
- for the same observations, which is often desirable in rolling-origin
172
- evaluation.
173
- """
174
-
175
- def __init__(
176
- self,
177
- steps: int,
178
- initial_train_size: int | str | pd.Timestamp | None = None,
179
- fold_stride: int | None = None,
180
- window_size: int | None = None,
181
- differentiation: int | None = None,
182
- refit: bool | int = False,
183
- fixed_train_size: bool = True,
184
- gap: int = 0,
185
- skip_folds: int | list[int] | None = None,
186
- allow_incomplete_fold: bool = True,
187
- return_all_indexes: bool = False,
188
- verbose: bool = True,
189
- ) -> None:
190
-
191
- super().__init__(
192
- steps=steps,
193
- initial_train_size=initial_train_size,
194
- fold_stride=fold_stride,
195
- window_size=window_size,
196
- differentiation=differentiation,
197
- refit=refit,
198
- fixed_train_size=fixed_train_size,
199
- gap=gap,
200
- skip_folds=skip_folds,
201
- allow_incomplete_fold=allow_incomplete_fold,
202
- return_all_indexes=return_all_indexes,
203
- verbose=verbose,
204
- )
205
-
206
- self.steps = steps
207
- self.fold_stride = fold_stride if fold_stride is not None else steps
208
- self.overlapping_folds = self.fold_stride < self.steps
209
- self.refit = refit
210
- self.fixed_train_size = fixed_train_size
211
- self.gap = gap
212
- self.skip_folds = skip_folds
213
- self.allow_incomplete_fold = allow_incomplete_fold
214
-
215
- def __repr__(self) -> str:
216
- """Information displayed when printed.
217
-
218
- Returns:
219
- String representation of the TimeSeriesFold object.
220
- """
221
-
222
- info = (
223
- f"{'=' * len(type(self).__name__)} \n"
224
- f"{type(self).__name__} \n"
225
- f"{'=' * len(type(self).__name__)} \n"
226
- f"Initial train size = {self.initial_train_size},\n"
227
- f"Steps = {self.steps},\n"
228
- f"Fold stride = {self.fold_stride},\n"
229
- f"Overlapping folds = {self.overlapping_folds},\n"
230
- f"Window size = {self.window_size},\n"
231
- f"Differentiation = {self.differentiation},\n"
232
- f"Refit = {self.refit},\n"
233
- f"Fixed train size = {self.fixed_train_size},\n"
234
- f"Gap = {self.gap},\n"
235
- f"Skip folds = {self.skip_folds},\n"
236
- f"Allow incomplete fold = {self.allow_incomplete_fold},\n"
237
- f"Return all indexes = {self.return_all_indexes},\n"
238
- f"Verbose = {self.verbose}\n"
239
- )
240
-
241
- return info
242
-
243
- def _repr_html_(self) -> str:
244
- """HTML representation of the object.
245
-
246
- The "General Information" section is expanded by default.
247
-
248
- Returns:
249
- HTML string representation for Jupyter notebooks.
250
- """
251
-
252
- style, unique_id = get_style_repr_html()
253
- content = f"""
254
- <div class="container-{unique_id}">
255
- <p style="font-size: 1.5em; font-weight: bold; margin-block-start: 0.83em; margin-block-end: 0.83em;">{type(self).__name__}</p>
256
- <details open>
257
- <summary>General Information</summary>
258
- <ul>
259
- <li><strong>Initial train size:</strong> {self.initial_train_size}</li>
260
- <li><strong>Steps:</strong> {self.steps}</li>
261
- <li><strong>Fold stride:</strong> {self.fold_stride}</li>
262
- <li><strong>Overlapping folds:</strong> {self.overlapping_folds}</li>
263
- <li><strong>Window size:</strong> {self.window_size}</li>
264
- <li><strong>Differentiation:</strong> {self.differentiation}</li>
265
- <li><strong>Refit:</strong> {self.refit}</li>
266
- <li><strong>Fixed train size:</strong> {self.fixed_train_size}</li>
267
- <li><strong>Gap:</strong> {self.gap}</li>
268
- <li><strong>Skip folds:</strong> {self.skip_folds}</li>
269
- <li><strong>Allow incomplete fold:</strong> {self.allow_incomplete_fold}</li>
270
- <li><strong>Return all indexes:</strong> {self.return_all_indexes}</li>
271
- </ul>
272
- </details>
273
- </div>
274
- """
275
-
276
- return style + content
277
-
278
- def split(
279
- self,
280
- X: pd.Series | pd.DataFrame | pd.Index | dict[str, pd.Series | pd.DataFrame],
281
- as_pandas: bool = False,
282
- ) -> list | pd.DataFrame:
283
- """Split the time series data into train and test folds.
284
-
285
- Args:
286
- X: Time series data or index to split. Can be a pandas Series, DataFrame,
287
- Index, or a dictionary of Series/DataFrames.
288
- as_pandas: If True, the folds are returned as a DataFrame. This is useful
289
- to visualize the folds in a more interpretable way. Defaults to False.
290
-
291
- Returns:
292
- A list of lists containing the indices (position) for each fold, or a
293
- DataFrame if `as_pandas=True`. Each list contains 4 lists and a boolean
294
- with the following information:
295
-
296
- - **fold**: fold number.
297
- - **[train_start, train_end]**: list with the start and end positions of
298
- the training set.
299
- - **[last_window_start, last_window_end]**: list with the start and end
300
- positions of the last window seen by the forecaster during training.
301
- The last window is used to generate the lags use as predictors. If
302
- `differentiation` is included, the interval is extended as many
303
- observations as the differentiation order. If the argument `window_size`
304
- is `None`, this list is empty.
305
- - **[test_start, test_end]**: list with the start and end positions of
306
- the test set. These are the observations used to evaluate the forecaster.
307
- - **[test_start_with_gap, test_end_with_gap]**: list with the start and
308
- end positions of the test set including the gap. The gap is the number
309
- of observations between the end of the training set and the start of
310
- the test set.
311
- - **fit_forecaster**: boolean indicating whether the forecaster should be
312
- fitted in this fold.
313
-
314
- Note:
315
- The returned values are the positions of the observations and not the
316
- actual values of the index, so they can be used to slice the data directly
317
- using iloc.
318
-
319
- If `as_pandas` is `True`, the folds are returned as a DataFrame with the
320
- following columns: 'fold', 'train_start', 'train_end', 'last_window_start',
321
- 'last_window_end', 'test_start', 'test_end', 'test_start_with_gap',
322
- 'test_end_with_gap', 'fit_forecaster'.
323
-
324
- Following the python convention, the start index is inclusive and the end
325
- index is exclusive. This means that the last index is not included in the
326
- slice.
327
- """
328
-
329
- if not isinstance(X, (pd.Series, pd.DataFrame, pd.Index, dict)):
330
- raise TypeError(
331
- f"X must be a pandas Series, DataFrame, Index or a dictionary. "
332
- f"Got {type(X)}."
333
- )
334
-
335
- window_size_as_date_offset = isinstance(
336
- self.window_size, pd.tseries.offsets.DateOffset
337
- )
338
- if window_size_as_date_offset:
339
- # Calculate the window_size in steps. This is not a exact calculation
340
- # because the offset follows the calendar rules and the distance between
341
- # two dates may not be constant.
342
- first_valid_index = X.index[-1] - self.window_size
343
- try:
344
- window_size_idx_start = X.index.get_loc(first_valid_index)
345
- window_size_idx_end = X.index.get_loc(X.index[-1])
346
- self.window_size = window_size_idx_end - window_size_idx_start
347
- except KeyError:
348
- raise ValueError(
349
- f"The length of `y` ({len(X)}), must be greater than or equal "
350
- f"to the window size ({self.window_size}). This is because "
351
- f"the offset (forecaster.offset) is larger than the available "
352
- f"data. Try to decrease the size of the offset (forecaster.offset), "
353
- f"the number of `n_offsets` (forecaster.n_offsets) or increase the "
354
- f"size of `y`."
355
- )
356
-
357
- if self.initial_train_size is None:
358
- if self.window_size is None:
359
- raise ValueError(
360
- "To use split method when `initial_train_size` is None, "
361
- "`window_size` must be an integer greater than 0. "
362
- "Although no initial training is done and all data is used to "
363
- "evaluate the model, the first `window_size` observations are "
364
- "needed to create the initial predictors. Got `window_size` = None."
365
- )
366
- if self.refit:
367
- raise ValueError(
368
- "`refit` is only allowed when `initial_train_size` is not `None`. "
369
- "Set `refit` to `False` if you want to use `initial_train_size = None`."
370
- )
371
- externally_fitted = True
372
- self.initial_train_size = self.window_size # Reset to None later
373
- else:
374
- if self.window_size is None:
375
- warnings.warn(
376
- "Last window cannot be calculated because `window_size` is None.",
377
- IgnoredArgumentWarning,
378
- )
379
- externally_fitted = False
380
-
381
- index = self._extract_index(X)
382
- idx = range(len(index))
383
- folds = []
384
- i = 0
385
-
386
- self.initial_train_size = date_to_index_position(
387
- index=index,
388
- date_input=self.initial_train_size,
389
- method="validation",
390
- date_literal="initial_train_size",
391
- )
392
-
393
- if window_size_as_date_offset:
394
- if self.initial_train_size is not None:
395
- if self.initial_train_size < self.window_size:
396
- raise ValueError(
397
- f"If `initial_train_size` is an integer, it must be greater than "
398
- f"the `window_size` of the forecaster ({self.window_size}) "
399
- f"and smaller than the length of the series ({len(X)}). If "
400
- f"it is a date, it must be within this range of the index."
401
- )
402
-
403
- if self.allow_incomplete_fold:
404
- # At least one observation after the gap to allow incomplete fold
405
- if len(index) <= self.initial_train_size + self.gap:
406
- raise ValueError(
407
- f"The time series must have more than `initial_train_size + gap` "
408
- f"observations to create at least one fold.\n"
409
- f" Time series length: {len(index)}\n"
410
- f" Required > {self.initial_train_size + self.gap}\n"
411
- f" initial_train_size: {self.initial_train_size}\n"
412
- f" gap: {self.gap}\n"
413
- )
414
- else:
415
- # At least one complete fold
416
- if len(index) < self.initial_train_size + self.gap + self.steps:
417
- raise ValueError(
418
- f"The time series must have at least `initial_train_size + gap + steps` "
419
- f"observations to create a minimum of one complete fold "
420
- f"(allow_incomplete_fold=False).\n"
421
- f" Time series length: {len(index)}\n"
422
- f" Required >= {self.initial_train_size + self.gap + self.steps}\n"
423
- f" initial_train_size: {self.initial_train_size}\n"
424
- f" gap: {self.gap}\n"
425
- f" steps: {self.steps}\n"
426
- )
427
-
428
- while self.initial_train_size + (i * self.fold_stride) + self.gap < len(index):
429
-
430
- if self.refit:
431
- # NOTE: If `fixed_train_size` the train size doesn't increase but
432
- # moves by `fold_stride` positions in each iteration. If `False`,
433
- # the train size increases by `fold_stride` in each iteration.
434
- train_iloc_start = (
435
- i * (self.fold_stride) if self.fixed_train_size else 0
436
- )
437
- train_iloc_end = self.initial_train_size + i * (self.fold_stride)
438
- test_iloc_start = train_iloc_end
439
- else:
440
- # NOTE: The train size doesn't increase and doesn't move.
441
- train_iloc_start = 0
442
- train_iloc_end = self.initial_train_size
443
- test_iloc_start = self.initial_train_size + i * (self.fold_stride)
444
-
445
- if self.window_size is not None:
446
- last_window_iloc_start = test_iloc_start - self.window_size
447
-
448
- test_iloc_end = test_iloc_start + self.gap + self.steps
449
-
450
- partitions = [
451
- idx[train_iloc_start:train_iloc_end],
452
- (
453
- idx[last_window_iloc_start:test_iloc_start]
454
- if self.window_size is not None
455
- else []
456
- ),
457
- idx[test_iloc_start:test_iloc_end],
458
- idx[test_iloc_start + self.gap : test_iloc_end],
459
- ]
460
- folds.append(partitions)
461
- i += 1
462
-
463
- # NOTE: Delete all incomplete folds at the end if not allowed
464
- n_removed_folds = 0
465
- if not self.allow_incomplete_fold:
466
- # NOTE: While folds and the last "test_index_with_gap" is incomplete,
467
- # calculating len of range objects
468
- while folds and len(folds[-1][3]) < self.steps:
469
- folds.pop()
470
- n_removed_folds += 1
471
-
472
- # Replace partitions inside folds with length 0 with `None`
473
- folds = [
474
- [partition if len(partition) > 0 else None for partition in fold]
475
- for fold in folds
476
- ]
477
-
478
- # Create a flag to know whether to train the forecaster
479
- if self.refit == 0:
480
- self.refit = False
481
-
482
- if isinstance(self.refit, bool):
483
- fit_forecaster = [self.refit] * len(folds)
484
- fit_forecaster[0] = True
485
- else:
486
- fit_forecaster = [False] * len(folds)
487
- for i in range(0, len(fit_forecaster), self.refit):
488
- fit_forecaster[i] = True
489
-
490
- for i in range(len(folds)):
491
- folds[i].insert(0, i)
492
- folds[i].append(fit_forecaster[i])
493
- if fit_forecaster[i] is False:
494
- folds[i][1] = folds[i - 1][1]
495
-
496
- index_to_skip = []
497
- if self.skip_folds is not None:
498
- if isinstance(self.skip_folds, (int, np.integer)) and self.skip_folds > 0:
499
- index_to_keep = np.arange(0, len(folds), self.skip_folds)
500
- index_to_skip = np.setdiff1d(
501
- np.arange(0, len(folds)), index_to_keep, assume_unique=True
502
- )
503
- index_to_skip = [
504
- int(x) for x in index_to_skip
505
- ] # Required since numpy 2.0
506
- if isinstance(self.skip_folds, list):
507
- index_to_skip = [i for i in self.skip_folds if i < len(folds)]
508
-
509
- if self.verbose:
510
- self._print_info(
511
- index=index,
512
- folds=folds,
513
- externally_fitted=externally_fitted,
514
- n_removed_folds=n_removed_folds,
515
- index_to_skip=index_to_skip,
516
- )
517
-
518
- folds = [fold for i, fold in enumerate(folds) if i not in index_to_skip]
519
- if not self.return_all_indexes:
520
- # NOTE: +1 to prevent iloc pandas from deleting the last observation
521
- folds = [
522
- [
523
- fold[0],
524
- [fold[1][0], fold[1][-1] + 1],
525
- (
526
- [fold[2][0], fold[2][-1] + 1]
527
- if self.window_size is not None
528
- else []
529
- ),
530
- [fold[3][0], fold[3][-1] + 1],
531
- [fold[4][0], fold[4][-1] + 1],
532
- fold[5],
533
- ]
534
- for fold in folds
535
- ]
536
-
537
- if externally_fitted:
538
- self.initial_train_size = None
539
- folds[0][5] = False
540
-
541
- if as_pandas:
542
- if self.window_size is None:
543
- for fold in folds:
544
- fold[2] = [None, None]
545
-
546
- if not self.return_all_indexes:
547
- folds = pd.DataFrame(
548
- data=[
549
- [fold[0]] + list(itertools.chain(*fold[1:-1])) + [fold[-1]]
550
- for fold in folds
551
- ],
552
- columns=[
553
- "fold",
554
- "train_start",
555
- "train_end",
556
- "last_window_start",
557
- "last_window_end",
558
- "test_start",
559
- "test_end",
560
- "test_start_with_gap",
561
- "test_end_with_gap",
562
- "fit_forecaster",
563
- ],
564
- )
565
- else:
566
- folds = pd.DataFrame(
567
- data=folds,
568
- columns=[
569
- "fold",
570
- "train_index",
571
- "last_window_index",
572
- "test_index",
573
- "test_index_with_gap",
574
- "fit_forecaster",
575
- ],
576
- )
577
-
578
- return folds
579
-
580
- def _print_info(
581
- self,
582
- index: pd.Index,
583
- folds: list[list[int]],
584
- externally_fitted: bool,
585
- n_removed_folds: int,
586
- index_to_skip: list[int],
587
- ) -> None:
588
- """Print information about folds.
589
-
590
- Args:
591
- index: Index of the time series data.
592
- folds: A list of lists containing the indices (position) for each fold.
593
- externally_fitted: Whether an already trained forecaster is to be used.
594
- n_removed_folds: Number of folds removed.
595
- index_to_skip: Number of folds skipped.
596
- """
597
-
598
- print("Information of folds")
599
- print("--------------------")
600
- if externally_fitted:
601
- print(
602
- f"An already trained forecaster is to be used. Window size: "
603
- f"{self.window_size}"
604
- )
605
- else:
606
- if self.differentiation is None:
607
- print(
608
- f"Number of observations used for initial training: "
609
- f"{self.initial_train_size}"
610
- )
611
- else:
612
- print(
613
- f"Number of observations used for initial training: "
614
- f"{self.initial_train_size - self.differentiation}"
615
- )
616
- print(
617
- f" First {self.differentiation} observation/s in training sets "
618
- f"are used for differentiation"
619
- )
620
- print(
621
- f"Number of observations used for backtesting: "
622
- f"{len(index) - self.initial_train_size}"
623
- )
624
- print(f" Number of folds: {len(folds)}")
625
- print(
626
- f" Number skipped folds: "
627
- f"{len(index_to_skip)} {index_to_skip if index_to_skip else ''}"
628
- )
629
- print(f" Number of steps per fold: {self.steps}")
630
- if self.steps != self.fold_stride:
631
- print(
632
- f" Number of steps to the next fold (fold stride): {self.fold_stride}"
633
- )
634
- print(
635
- f" Number of steps to exclude between last observed data "
636
- f"(last window) and predictions (gap): {self.gap}"
637
- )
638
- if n_removed_folds > 0:
639
- print(
640
- f" The last {n_removed_folds} fold(s) have been excluded "
641
- f"because they were incomplete."
642
- )
643
-
644
- if len(folds[-1][4]) < self.steps:
645
- print(f" Last fold only includes {len(folds[-1][4])} observations.")
646
-
647
- print("")
648
-
649
- if self.differentiation is None:
650
- differentiation = 0
651
- else:
652
- differentiation = self.differentiation
653
-
654
- for i, fold in enumerate(folds):
655
- is_fold_skipped = i in index_to_skip
656
- has_training = fold[-1] if i != 0 else True
657
- training_start = (
658
- index[fold[1][0] + differentiation] if fold[1] is not None else None
659
- )
660
- training_end = index[fold[1][-1]] if fold[1] is not None else None
661
- training_length = (
662
- len(fold[1]) - differentiation if fold[1] is not None else 0
663
- )
664
- validation_start = index[fold[4][0]]
665
- validation_end = index[fold[4][-1]]
666
- validation_length = len(fold[4])
667
-
668
- print(f"Fold: {i}")
669
- if is_fold_skipped:
670
- print(" Fold skipped")
671
- elif not externally_fitted and has_training:
672
- print(
673
- f" Training: {training_start} -- {training_end} "
674
- f"(n={training_length})"
675
- )
676
- print(
677
- f" Validation: {validation_start} -- {validation_end} "
678
- f"(n={validation_length})"
679
- )
680
- else:
681
- print(" Training: No training in this fold")
682
- print(
683
- f" Validation: {validation_start} -- {validation_end} "
684
- f"(n={validation_length})"
685
- )
686
-
687
- print("")