spotforecast2 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. spotforecast2/.DS_Store +0 -0
  2. spotforecast2/__init__.py +2 -0
  3. spotforecast2/data/__init__.py +0 -0
  4. spotforecast2/data/data.py +130 -0
  5. spotforecast2/data/fetch_data.py +209 -0
  6. spotforecast2/exceptions.py +681 -0
  7. spotforecast2/forecaster/.DS_Store +0 -0
  8. spotforecast2/forecaster/__init__.py +7 -0
  9. spotforecast2/forecaster/base.py +448 -0
  10. spotforecast2/forecaster/metrics.py +527 -0
  11. spotforecast2/forecaster/recursive/__init__.py +4 -0
  12. spotforecast2/forecaster/recursive/_forecaster_equivalent_date.py +1075 -0
  13. spotforecast2/forecaster/recursive/_forecaster_recursive.py +939 -0
  14. spotforecast2/forecaster/recursive/_warnings.py +15 -0
  15. spotforecast2/forecaster/utils.py +954 -0
  16. spotforecast2/model_selection/__init__.py +5 -0
  17. spotforecast2/model_selection/bayesian_search.py +453 -0
  18. spotforecast2/model_selection/grid_search.py +314 -0
  19. spotforecast2/model_selection/random_search.py +151 -0
  20. spotforecast2/model_selection/split_base.py +357 -0
  21. spotforecast2/model_selection/split_one_step.py +245 -0
  22. spotforecast2/model_selection/split_ts_cv.py +634 -0
  23. spotforecast2/model_selection/utils_common.py +718 -0
  24. spotforecast2/model_selection/utils_metrics.py +103 -0
  25. spotforecast2/model_selection/validation.py +685 -0
  26. spotforecast2/preprocessing/__init__.py +30 -0
  27. spotforecast2/preprocessing/_binner.py +378 -0
  28. spotforecast2/preprocessing/_common.py +123 -0
  29. spotforecast2/preprocessing/_differentiator.py +123 -0
  30. spotforecast2/preprocessing/_rolling.py +136 -0
  31. spotforecast2/preprocessing/curate_data.py +254 -0
  32. spotforecast2/preprocessing/imputation.py +92 -0
  33. spotforecast2/preprocessing/outlier.py +114 -0
  34. spotforecast2/preprocessing/split.py +139 -0
  35. spotforecast2/py.typed +0 -0
  36. spotforecast2/utils/__init__.py +43 -0
  37. spotforecast2/utils/convert_to_utc.py +44 -0
  38. spotforecast2/utils/data_transform.py +208 -0
  39. spotforecast2/utils/forecaster_config.py +344 -0
  40. spotforecast2/utils/generate_holiday.py +70 -0
  41. spotforecast2/utils/validation.py +569 -0
  42. spotforecast2/weather/__init__.py +0 -0
  43. spotforecast2/weather/weather_client.py +288 -0
  44. spotforecast2-0.0.1.dist-info/METADATA +47 -0
  45. spotforecast2-0.0.1.dist-info/RECORD +46 -0
  46. spotforecast2-0.0.1.dist-info/WHEEL +4 -0
@@ -0,0 +1,954 @@
1
+ from typing import Any, List, Optional, Tuple, Union
2
+ import pandas as pd
3
+ import numpy as np
4
+ import warnings
5
+ import uuid
6
+ from sklearn.compose import ColumnTransformer
7
+ from spotforecast2.utils import (
8
+ initialize_lags,
9
+ initialize_weights,
10
+ check_select_fit_kwargs,
11
+ check_y,
12
+ check_exog,
13
+ get_exog_dtypes,
14
+ check_exog_dtypes,
15
+ check_predict_input,
16
+ check_interval,
17
+ input_to_frame,
18
+ expand_index,
19
+ transform_dataframe,
20
+ )
21
+ from spotforecast2.exceptions import set_skforecast_warnings, UnknownLevelWarning
22
+
23
+
24
+ def check_preprocess_series(series):
25
+ pass
26
+
27
+
28
+ def check_preprocess_exog_multiseries(exog):
29
+ pass
30
+
31
+
32
+ def exog_to_direct(
33
+ exog: pd.Series | pd.DataFrame, steps: int
34
+ ) -> tuple[pd.DataFrame, list[str]]:
35
+ """
36
+ Transforms `exog` to a pandas DataFrame with the shape needed for Direct
37
+ forecasting.
38
+
39
+ Args:
40
+ exog : pandas Series, pandas DataFrame
41
+ Exogenous variables.
42
+ steps : int
43
+ Number of steps that will be predicted using exog.
44
+
45
+ Returns:
46
+ tuple[pd.DataFrame, list[str]]:
47
+ exog_direct : pandas DataFrame
48
+ Exogenous variables transformed.
49
+ exog_direct_names : list
50
+ Names of the columns of the exogenous variables transformed. Only
51
+ created if `exog` is a pandas Series or DataFrame.
52
+ """
53
+
54
+ if not isinstance(exog, (pd.Series, pd.DataFrame)):
55
+ raise TypeError(
56
+ f"`exog` must be a pandas Series or DataFrame. Got {type(exog)}."
57
+ )
58
+
59
+ if isinstance(exog, pd.Series):
60
+ exog = exog.to_frame()
61
+
62
+ n_rows = len(exog)
63
+ exog_idx = exog.index
64
+ exog_cols = exog.columns
65
+ exog_direct = []
66
+ for i in range(steps):
67
+ exog_step = exog.iloc[i : n_rows - (steps - 1 - i),]
68
+ exog_step.index = pd.RangeIndex(len(exog_step))
69
+ exog_step.columns = [f"{col}_step_{i + 1}" for col in exog_cols]
70
+ exog_direct.append(exog_step)
71
+
72
+ exog_direct = pd.concat(exog_direct, axis=1) if steps > 1 else exog_direct[0]
73
+
74
+ exog_direct_names = exog_direct.columns.to_list()
75
+ exog_direct.index = exog_idx[-len(exog_direct) :]
76
+
77
+ return exog_direct, exog_direct_names
78
+
79
+
80
+ def exog_to_direct_numpy(
81
+ exog: np.ndarray | pd.Series | pd.DataFrame, steps: int
82
+ ) -> tuple[np.ndarray, list[str] | None]:
83
+ """
84
+ Transforms `exog` to numpy ndarray with the shape needed for Direct
85
+ forecasting.
86
+
87
+ Args:
88
+ exog : numpy ndarray, pandas Series, pandas DataFrame
89
+ Exogenous variables, shape(samples,). If exog is a pandas format, the
90
+ direct exog names are created.
91
+ steps : int
92
+ Number of steps that will be predicted using exog.
93
+
94
+ Returns:
95
+ tuple[np.ndarray, list[str] | None]:
96
+ exog_direct : numpy ndarray
97
+ Exogenous variables transformed.
98
+ exog_direct_names : list, None
99
+ Names of the columns of the exogenous variables transformed. Only
100
+ created if `exog` is a pandas Series or DataFrame.
101
+ """
102
+
103
+ if isinstance(exog, (pd.Series, pd.DataFrame)):
104
+ exog_cols = exog.columns if isinstance(exog, pd.DataFrame) else [exog.name]
105
+ exog_direct_names = [
106
+ f"{col}_step_{i + 1}" for i in range(steps) for col in exog_cols
107
+ ]
108
+ exog = exog.to_numpy()
109
+ else:
110
+ exog_direct_names = None
111
+ if not isinstance(exog, np.ndarray):
112
+ raise TypeError(
113
+ f"`exog` must be a numpy ndarray, pandas Series or DataFrame. "
114
+ f"Got {type(exog)}."
115
+ )
116
+
117
+ if exog.ndim == 1:
118
+ exog = np.expand_dims(exog, axis=1)
119
+
120
+ n_rows = len(exog)
121
+ exog_direct = [exog[i : n_rows - (steps - 1 - i)] for i in range(steps)]
122
+ exog_direct = np.concatenate(exog_direct, axis=1) if steps > 1 else exog_direct[0]
123
+
124
+ return exog_direct, exog_direct_names
125
+
126
+
127
+ def prepare_steps_direct(
128
+ max_step: int | list[int] | np.ndarray, steps: int | list[int] | None = None
129
+ ) -> list[int]:
130
+ """
131
+ Prepare list of steps to be predicted in Direct Forecasters.
132
+
133
+ Args:
134
+ max_step : int, list, numpy ndarray
135
+ Maximum number of future steps the forecaster will predict
136
+ when using predict methods.
137
+ steps : int, list, None, default None
138
+ Predict n steps. The value of `steps` must be less than or equal to the
139
+ value of steps defined when initializing the forecaster. Starts at 1.
140
+
141
+ - If `int`: Only steps within the range of 1 to int are predicted.
142
+ - If `list`: List of ints. Only the steps contained in the list
143
+ are predicted.
144
+ - If `None`: As many steps are predicted as were defined at
145
+ initialization.
146
+
147
+ Returns:
148
+ list[int]:
149
+ Steps to be predicted.
150
+ """
151
+
152
+ if isinstance(steps, int):
153
+ steps_direct = list(range(1, steps + 1))
154
+ elif steps is None:
155
+ if isinstance(max_step, int):
156
+ steps_direct = list(range(1, max_step + 1))
157
+ else:
158
+ steps_direct = [int(s) for s in max_step]
159
+ elif isinstance(steps, list):
160
+ steps_direct = []
161
+ for step in steps:
162
+ if not isinstance(step, (int, np.integer)):
163
+ raise TypeError(
164
+ f"`steps` argument must be an int, a list of ints or `None`. "
165
+ f"Got {type(steps)}."
166
+ )
167
+ steps_direct.append(int(step))
168
+
169
+ return steps_direct
170
+
171
+
172
+ def transform_numpy(
173
+ array: np.ndarray,
174
+ transformer: object | None,
175
+ fit: bool = False,
176
+ inverse_transform: bool = False,
177
+ ) -> np.ndarray:
178
+ """
179
+ Transform raw values of a numpy ndarray with a scikit-learn alike
180
+ transformer, preprocessor or ColumnTransformer. The transformer used must
181
+ have the following methods: fit, transform, fit_transform and
182
+ inverse_transform. ColumnTransformers are not allowed since they do not
183
+ have inverse_transform method.
184
+
185
+ Args:
186
+ array : numpy ndarray
187
+ Array to be transformed.
188
+ transformer : scikit-learn alike transformer, preprocessor, or ColumnTransformer.
189
+ Scikit-learn alike transformer (preprocessor) with methods: fit, transform,
190
+ fit_transform and inverse_transform.
191
+ fit : bool, default False
192
+ Train the transformer before applying it.
193
+ inverse_transform : bool, default False
194
+ Transform back the data to the original representation. This is not available
195
+ when using transformers of class scikit-learn ColumnTransformers.
196
+
197
+ Returns
198
+ -------
199
+ array_transformed : numpy ndarray
200
+ Transformed array.
201
+
202
+ """
203
+
204
+ if transformer is None:
205
+ return array
206
+
207
+ if not isinstance(array, np.ndarray):
208
+ raise TypeError(f"`array` argument must be a numpy ndarray. Got {type(array)}")
209
+
210
+ original_ndim = array.ndim
211
+ original_shape = array.shape
212
+ reshaped_for_inverse = False
213
+
214
+ if original_ndim == 1:
215
+ array = array.reshape(-1, 1)
216
+
217
+ if inverse_transform and isinstance(transformer, ColumnTransformer):
218
+ raise ValueError(
219
+ "`inverse_transform` is not available when using ColumnTransformers."
220
+ )
221
+
222
+ with warnings.catch_warnings():
223
+ warnings.filterwarnings(
224
+ "ignore",
225
+ message="X does not have valid feature names",
226
+ category=UserWarning,
227
+ )
228
+ if not inverse_transform:
229
+ if fit:
230
+ array_transformed = transformer.fit_transform(array)
231
+ else:
232
+ array_transformed = transformer.transform(array)
233
+ else:
234
+ # Vectorized inverse transformation for 2D arrays with multiple columns.
235
+ # Reshape to single column, transform, and reshape back.
236
+ # This is faster than applying the transformer column by column.
237
+ if array.shape[1] > 1:
238
+ array = array.reshape(-1, 1)
239
+ reshaped_for_inverse = True
240
+ array_transformed = transformer.inverse_transform(array)
241
+
242
+ if hasattr(array_transformed, "toarray"):
243
+ # If the returned values are in sparse matrix format, it is converted to dense
244
+ array_transformed = array_transformed.toarray()
245
+
246
+ if isinstance(array_transformed, (pd.Series, pd.DataFrame)):
247
+ array_transformed = array_transformed.to_numpy()
248
+
249
+ # Reshape back to original shape only if we reshaped for inverse_transform
250
+ if reshaped_for_inverse:
251
+ array_transformed = array_transformed.reshape(original_shape)
252
+
253
+ if original_ndim == 1:
254
+ array_transformed = array_transformed.ravel()
255
+
256
+ return array_transformed
257
+
258
+
259
+ def select_n_jobs_fit_forecaster(forecaster_name, estimator):
260
+ """
261
+ Select the number of jobs to run in parallel.
262
+ """
263
+ import os
264
+
265
+ return os.cpu_count() or 1
266
+
267
+
268
+ __all__ = [
269
+ "initialize_lags",
270
+ "initialize_weights",
271
+ "check_select_fit_kwargs",
272
+ "check_y",
273
+ "check_exog",
274
+ "get_exog_dtypes",
275
+ "check_exog_dtypes",
276
+ "check_predict_input",
277
+ "check_interval",
278
+ "input_to_frame",
279
+ "expand_index",
280
+ "transform_dataframe",
281
+ "check_preprocess_series",
282
+ "check_preprocess_exog_multiseries",
283
+ "set_skforecast_warnings",
284
+ "initialize_window_features",
285
+ "initialize_transformer_series",
286
+ "check_extract_values_and_index",
287
+ "get_style_repr_html",
288
+ "initialize_estimator",
289
+ "check_residuals_input",
290
+ "date_to_index_position",
291
+ "prepare_steps_direct",
292
+ "exog_to_direct",
293
+ "exog_to_direct_numpy",
294
+ "transform_numpy",
295
+ "select_n_jobs_fit_forecaster",
296
+ "predict_multivariate",
297
+ ]
298
+
299
+
300
+ def initialize_window_features(
301
+ window_features: Any,
302
+ ) -> Tuple[Optional[List[object]], Optional[List[str]], Optional[int]]:
303
+ """Check window_features argument input and generate the corresponding list.
304
+
305
+ This function validates window feature objects and extracts their metadata,
306
+ ensuring they have the required attributes (window_sizes, features_names) and
307
+ methods (transform_batch, transform) for proper forecasting operations.
308
+
309
+ Args:
310
+ window_features: Classes used to create window features. Can be a single
311
+ object or a list of objects. Each object must have `window_sizes`,
312
+ `features_names` attributes and `transform_batch`, `transform` methods.
313
+
314
+ Returns:
315
+ tuple: A tuple containing:
316
+ - window_features (list or None): List of classes used to create window features.
317
+ - window_features_names (list or None): List with all the features names of the window features.
318
+ - max_size_window_features (int or None): Maximum value of the `window_sizes` attribute of all classes.
319
+
320
+ Raises:
321
+ ValueError: If `window_features` is an empty list.
322
+ ValueError: If a window feature is missing required attributes or methods.
323
+ TypeError: If `window_sizes` or `features_names` have incorrect types.
324
+
325
+ Examples:
326
+ >>> from spotforecast2.forecaster.preprocessing import RollingFeatures
327
+ >>> wf = RollingFeatures(stats=['mean', 'std'], window_sizes=[7, 14])
328
+ >>> wf_list, names, max_size = initialize_window_features(wf)
329
+ >>> print(f"Max window size: {max_size}")
330
+ Max window size: 14
331
+ >>> print(f"Number of features: {len(names)}")
332
+ Number of features: 4
333
+
334
+ Multiple window features:
335
+ >>> wf1 = RollingFeatures(stats=['mean'], window_sizes=7)
336
+ >>> wf2 = RollingFeatures(stats=['max', 'min'], window_sizes=3)
337
+ >>> wf_list, names, max_size = initialize_window_features([wf1, wf2])
338
+ >>> print(f"Max window size: {max_size}")
339
+ Max window size: 7
340
+ """
341
+
342
+ needed_atts = ["window_sizes", "features_names"]
343
+ needed_methods = ["transform_batch", "transform"]
344
+
345
+ max_window_sizes = None
346
+ window_features_names = None
347
+ max_size_window_features = None
348
+ if window_features is not None:
349
+ if isinstance(window_features, list) and len(window_features) < 1:
350
+ raise ValueError(
351
+ "Argument `window_features` must contain at least one element."
352
+ )
353
+ if not isinstance(window_features, list):
354
+ window_features = [window_features]
355
+
356
+ link_to_docs = (
357
+ "\nVisit the documentation for more information about how to create "
358
+ "custom window features:\n"
359
+ "https://skforecast.org/latest/user_guides/window-features-and-custom-features.html#create-your-custom-window-features"
360
+ )
361
+
362
+ max_window_sizes = []
363
+ window_features_names = []
364
+ needed_atts_set = set(needed_atts)
365
+ needed_methods_set = set(needed_methods)
366
+ for wf in window_features:
367
+ wf_name = type(wf).__name__
368
+ atts_methods = set(dir(wf))
369
+ if not needed_atts_set.issubset(atts_methods):
370
+ raise ValueError(
371
+ f"{wf_name} must have the attributes: {needed_atts}." + link_to_docs
372
+ )
373
+ if not needed_methods_set.issubset(atts_methods):
374
+ raise ValueError(
375
+ f"{wf_name} must have the methods: {needed_methods}." + link_to_docs
376
+ )
377
+
378
+ window_sizes = wf.window_sizes
379
+ if not isinstance(window_sizes, (int, list)):
380
+ raise TypeError(
381
+ f"Attribute `window_sizes` of {wf_name} must be an int or a list "
382
+ f"of ints. Got {type(window_sizes)}." + link_to_docs
383
+ )
384
+
385
+ if isinstance(window_sizes, int):
386
+ if window_sizes < 1:
387
+ raise ValueError(
388
+ f"If argument `window_sizes` is an integer, it must be equal to or "
389
+ f"greater than 1. Got {window_sizes} from {wf_name}."
390
+ + link_to_docs
391
+ )
392
+ max_window_sizes.append(window_sizes)
393
+ else:
394
+ if not all(isinstance(ws, int) for ws in window_sizes) or not all(
395
+ ws >= 1 for ws in window_sizes
396
+ ):
397
+ raise ValueError(
398
+ f"If argument `window_sizes` is a list, all elements must be integers "
399
+ f"equal to or greater than 1. Got {window_sizes} from {wf_name}."
400
+ + link_to_docs
401
+ )
402
+ max_window_sizes.append(max(window_sizes))
403
+
404
+ features_names = wf.features_names
405
+ if not isinstance(features_names, (str, list)):
406
+ raise TypeError(
407
+ f"Attribute `features_names` of {wf_name} must be a str or "
408
+ f"a list of strings. Got {type(features_names)}." + link_to_docs
409
+ )
410
+ if isinstance(features_names, str):
411
+ window_features_names.append(features_names)
412
+ else:
413
+ if not all(isinstance(fn, str) for fn in features_names):
414
+ raise TypeError(
415
+ f"If argument `features_names` is a list, all elements "
416
+ f"must be strings. Got {features_names} from {wf_name}."
417
+ + link_to_docs
418
+ )
419
+ window_features_names.extend(features_names)
420
+
421
+ max_size_window_features = max(max_window_sizes)
422
+ if len(set(window_features_names)) != len(window_features_names):
423
+ raise ValueError(
424
+ f"All window features names must be unique. Got {window_features_names}."
425
+ )
426
+
427
+ return window_features, window_features_names, max_size_window_features
428
+
429
+
430
+ def check_extract_values_and_index(
431
+ data: Union[pd.Series, pd.DataFrame],
432
+ data_label: str = "`y`",
433
+ ignore_freq: bool = False,
434
+ return_values: bool = True,
435
+ ) -> Tuple[Optional[np.ndarray], pd.Index]:
436
+ """Extract values and index from a pandas Series or DataFrame, ensuring they are valid.
437
+
438
+ Validates that the input data has a proper DatetimeIndex or RangeIndex and extracts
439
+ its values and index for use in forecasting operations. Optionally checks for
440
+ index frequency consistency.
441
+
442
+ Args:
443
+ data: Input data (pandas Series or DataFrame) to extract values and index from.
444
+ data_label: Label used in exception messages for better error reporting.
445
+ Defaults to "`y`".
446
+ ignore_freq: If True, the frequency of the index is not checked.
447
+ Defaults to False.
448
+ return_values: If True, the values of the data are returned.
449
+ Defaults to True.
450
+
451
+ Returns:
452
+ tuple: A tuple containing:
453
+ - values (numpy.ndarray or None): Values of the data as numpy array,
454
+ or None if return_values is False.
455
+ - index (pandas.Index): Index of the data.
456
+
457
+ Raises:
458
+ TypeError: If data is not a pandas Series or DataFrame.
459
+ TypeError: If data index is not a DatetimeIndex or RangeIndex.
460
+
461
+ Warnings:
462
+ UserWarning: If DatetimeIndex has no frequency (inferred automatically).
463
+
464
+ Examples:
465
+ >>> import pandas as pd
466
+ >>> import numpy as np
467
+ >>> dates = pd.date_range('2020-01-01', periods=10, freq='D')
468
+ >>> series = pd.Series(np.arange(10), index=dates)
469
+ >>> values, index = check_extract_values_and_index(series)
470
+ >>> print(values.shape)
471
+ (10,)
472
+ >>> print(type(index))
473
+ <class 'pandas.core.indexes.datetimes.DatetimeIndex'>
474
+
475
+ Extract index only:
476
+ >>> _, index = check_extract_values_and_index(series, return_values=False)
477
+ >>> print(index[0])
478
+ 2020-01-01 00:00:00
479
+ """
480
+
481
+ if not isinstance(data, (pd.Series, pd.DataFrame)):
482
+ raise TypeError(f"{data_label} must be a pandas Series or DataFrame.")
483
+
484
+ if not isinstance(data.index, (pd.DatetimeIndex, pd.RangeIndex)):
485
+ raise TypeError(f"{data_label} must have a pandas DatetimeIndex or RangeIndex.")
486
+
487
+ if isinstance(data.index, pd.DatetimeIndex) and not ignore_freq:
488
+ if data.index.freq is None:
489
+ warnings.warn(
490
+ f"{data_label} has a DatetimeIndex but no frequency. "
491
+ "The frequency has been inferred from the index.",
492
+ UserWarning,
493
+ )
494
+
495
+ values = data.to_numpy() if return_values else None
496
+
497
+ return values, data.index
498
+
499
+
500
+ def get_style_repr_html(is_fitted: bool = False) -> Tuple[str, str]:
501
+ """Generate CSS style for HTML representation of the Forecaster.
502
+
503
+ Creates a unique CSS style block with a container ID for rendering
504
+ forecaster objects in Jupyter notebooks or HTML documents. The styling
505
+ provides a clean, monospace display with a light gray background.
506
+
507
+ Args:
508
+ is_fitted: Parameter to indicate if the Forecaster has been fitted.
509
+ Currently not used in styling but reserved for future extensions.
510
+
511
+ Returns:
512
+ tuple: A tuple containing:
513
+ - style (str): CSS style block as a string with unique container class.
514
+ - unique_id (str): Unique 8-character ID for the container element.
515
+
516
+ Examples:
517
+ >>> style, uid = get_style_repr_html(is_fitted=True)
518
+ >>> print(f"Container ID: {uid}")
519
+ Container ID: a1b2c3d4
520
+ >>> print(f"Style contains CSS: {'container-' in style}")
521
+ Style contains CSS: True
522
+
523
+ Using in HTML rendering:
524
+ >>> style, uid = get_style_repr_html(is_fitted=False)
525
+ >>> html = f"{style}<div class='container-{uid}'>Forecaster Info</div>"
526
+ >>> print("background-color" in html)
527
+ True
528
+ """
529
+
530
+ unique_id = str(uuid.uuid4())[:8]
531
+ style = f"""
532
+ <style>
533
+ .container-{unique_id} {{
534
+ font-family: monospace;
535
+ background-color: #f0f0f0;
536
+ padding: 10px;
537
+ border-radius: 5px;
538
+ }}
539
+ </style>
540
+ """
541
+ return style, unique_id
542
+
543
+
544
+ def check_residuals_input(
545
+ forecaster_name: str,
546
+ use_in_sample_residuals: bool,
547
+ in_sample_residuals_: np.ndarray | dict[str, np.ndarray] | None,
548
+ out_sample_residuals_: np.ndarray | dict[str, np.ndarray] | None,
549
+ use_binned_residuals: bool,
550
+ in_sample_residuals_by_bin_: (
551
+ dict[str | int, np.ndarray | dict[int, np.ndarray]] | None
552
+ ),
553
+ out_sample_residuals_by_bin_: (
554
+ dict[str | int, np.ndarray | dict[int, np.ndarray]] | None
555
+ ),
556
+ levels: list[str] | None = None,
557
+ encoding: str | None = None,
558
+ ) -> None:
559
+ """
560
+ Check residuals input arguments in Forecasters.
561
+
562
+ Args:
563
+ forecaster_name : str
564
+ Forecaster name.
565
+ use_in_sample_residuals : bool
566
+ Indicates if in sample or out sample residuals are used.
567
+ in_sample_residuals_ : numpy ndarray, dict
568
+ Residuals of the model when predicting training data.
569
+ out_sample_residuals_ : numpy ndarray, dict
570
+ Residuals of the model when predicting non training data.
571
+ use_binned_residuals : bool
572
+ Indicates if residuals are binned.
573
+ in_sample_residuals_by_bin_ : dict
574
+ In sample residuals binned according to the predicted value each residual
575
+ is associated with.
576
+ out_sample_residuals_by_bin_ : dict
577
+ Out of sample residuals binned according to the predicted value each residual
578
+ is associated with.
579
+ levels : list, default None
580
+ Names of the series (levels) to be predicted (Forecasters multiseries).
581
+ encoding : str, default None
582
+ Encoding used to identify the different series (ForecasterRecursiveMultiSeries).
583
+
584
+ Returns:
585
+ None
586
+
587
+ """
588
+
589
+ forecasters_multiseries = (
590
+ "ForecasterRecursiveMultiSeries",
591
+ "ForecasterDirectMultiVariate",
592
+ "ForecasterRnn",
593
+ )
594
+
595
+ if use_in_sample_residuals:
596
+ if use_binned_residuals:
597
+ residuals = in_sample_residuals_by_bin_
598
+ literal = "in_sample_residuals_by_bin_"
599
+ else:
600
+ residuals = in_sample_residuals_
601
+ literal = "in_sample_residuals_"
602
+
603
+ # Check if residuals are empty or None
604
+ is_empty = (
605
+ residuals is None
606
+ or (isinstance(residuals, dict) and not residuals)
607
+ or (isinstance(residuals, np.ndarray) and residuals.size == 0)
608
+ )
609
+ if is_empty:
610
+ raise ValueError(
611
+ f"`forecaster.{literal}` is either None or empty. Use "
612
+ f"`store_in_sample_residuals = True` when fitting the forecaster "
613
+ f"or use the `set_in_sample_residuals()` method before predicting."
614
+ )
615
+
616
+ if forecaster_name in forecasters_multiseries:
617
+ if encoding is not None:
618
+ unknown_levels = set(levels) - set(residuals.keys())
619
+ if unknown_levels:
620
+ warnings.warn(
621
+ f"`levels` {unknown_levels} are not present in `forecaster.{literal}`, "
622
+ f"most likely because they were not present in the training data. "
623
+ f"A random sample of the residuals from other levels will be used. "
624
+ f"This can lead to inaccurate intervals for the unknown levels.",
625
+ UnknownLevelWarning,
626
+ )
627
+ else:
628
+ if use_binned_residuals:
629
+ residuals = out_sample_residuals_by_bin_
630
+ literal = "out_sample_residuals_by_bin_"
631
+ else:
632
+ residuals = out_sample_residuals_
633
+ literal = "out_sample_residuals_"
634
+
635
+ is_empty = (
636
+ residuals is None
637
+ or (isinstance(residuals, dict) and not residuals)
638
+ or (isinstance(residuals, np.ndarray) and residuals.size == 0)
639
+ )
640
+ if is_empty:
641
+ raise ValueError(
642
+ f"`forecaster.{literal}` is either None or empty. Use "
643
+ f"`set_out_sample_residuals()` method before predicting."
644
+ )
645
+
646
+ if forecaster_name in forecasters_multiseries:
647
+ if encoding is not None:
648
+ unknown_levels = set(levels) - set(residuals.keys())
649
+ if unknown_levels:
650
+ warnings.warn(
651
+ f"`levels` {unknown_levels} are not present in `forecaster.{literal}`, "
652
+ f"most likely because they were not present in the training data. "
653
+ f"A random sample of the residuals from other levels will be used. "
654
+ f"This can lead to inaccurate intervals for the unknown levels.",
655
+ UnknownLevelWarning,
656
+ )
657
+
658
+
659
+ def date_to_index_position(
660
+ index: pd.Index,
661
+ date_input: int | str | pd.Timestamp,
662
+ method: str = "prediction",
663
+ date_literal: str = "steps",
664
+ kwargs_pd_to_datetime: dict = {},
665
+ ) -> int:
666
+ """
667
+ Transform a datetime string or pandas Timestamp to an integer. The integer
668
+ represents the position of the datetime in the index.
669
+
670
+ Args:
671
+ index : pandas Index
672
+ Original datetime index (must be a pandas DatetimeIndex if `date_input`
673
+ is not an int).
674
+ date_input : int, str, pandas Timestamp
675
+ Datetime to transform to integer.
676
+
677
+ + If int, returns the same integer.
678
+ + If str or pandas Timestamp, it is converted and expanded into the index.
679
+ method : str, default 'prediction'
680
+ Can be 'prediction' or 'validation'.
681
+
682
+ + If 'prediction', the date must be later than the last date in the index.
683
+ + If 'validation', the date must be within the index range.
684
+ date_literal : str, default 'steps'
685
+ Variable name used in error messages.
686
+ kwargs_pd_to_datetime : dict, default {}
687
+ Additional keyword arguments to pass to `pd.to_datetime()`.
688
+
689
+ Returns:
690
+ int:
691
+ `date_input` transformed to integer position in the `index`.
692
+
693
+ + If `date_input` is an integer, it returns the same integer.
694
+ + If method is 'prediction', number of steps to predict from the last
695
+ date in the index.
696
+ + If method is 'validation', position plus one of the date in the index,
697
+ this is done to include the target date in the training set when using
698
+ pandas iloc with slices.
699
+
700
+ """
701
+
702
+ if method not in ["prediction", "validation"]:
703
+ raise ValueError("`method` must be 'prediction' or 'validation'.")
704
+
705
+ if isinstance(date_input, (str, pd.Timestamp)):
706
+ if not isinstance(index, pd.DatetimeIndex):
707
+ raise TypeError(
708
+ f"Index must be a pandas DatetimeIndex when `{date_literal}` is "
709
+ f"not an integer. Check input series or last window."
710
+ )
711
+
712
+ target_date = pd.to_datetime(date_input, **kwargs_pd_to_datetime)
713
+ last_date = pd.to_datetime(index[-1])
714
+
715
+ if method == "prediction":
716
+ if target_date <= last_date:
717
+ raise ValueError(
718
+ "If `steps` is a date, it must be greater than the last date "
719
+ "in the index."
720
+ )
721
+ span_index = pd.date_range(
722
+ start=last_date, end=target_date, freq=index.freq
723
+ )
724
+ output = len(span_index) - 1
725
+ elif method == "validation":
726
+ first_date = pd.to_datetime(index[0])
727
+ if target_date < first_date or target_date > last_date:
728
+ raise ValueError(
729
+ "If `initial_train_size` is a date, it must be greater than "
730
+ "the first date in the index and less than the last date."
731
+ )
732
+ span_index = pd.date_range(
733
+ start=first_date, end=target_date, freq=index.freq
734
+ )
735
+ output = len(span_index)
736
+
737
+ elif isinstance(date_input, (int, np.integer)):
738
+ output = date_input
739
+
740
+ else:
741
+ raise TypeError(
742
+ f"`{date_literal}` must be an integer, string, or pandas Timestamp."
743
+ )
744
+
745
+ return output
746
+
747
+
748
+ def initialize_estimator(
749
+ estimator: object | None = None, regressor: object | None = None
750
+ ) -> None:
751
+ """
752
+ Helper to handle the deprecation of 'regressor' in favor of 'estimator'.
753
+ Returns the valid estimator object.
754
+
755
+ Args:
756
+ estimator : estimator or pipeline compatible with the scikit-learn API, default None
757
+ An instance of a estimator or pipeline compatible with the scikit-learn API.
758
+ regressor : estimator or pipeline compatible with the scikit-learn API, default None
759
+ Deprecated. An instance of a estimator or pipeline compatible with the
760
+ scikit-learn API.
761
+
762
+ Returns:
763
+ estimator or pipeline compatible with the scikit-learn API
764
+ The valid estimator object.
765
+
766
+ """
767
+
768
+ if regressor is not None:
769
+ warnings.warn(
770
+ "The `regressor` argument is deprecated and will be removed in a future "
771
+ "version. Please use `estimator` instead.",
772
+ FutureWarning,
773
+ stacklevel=3, # Important: to point to the user's code
774
+ )
775
+ if estimator is not None:
776
+ raise ValueError(
777
+ "Both `estimator` and `regressor` were provided. Use only `estimator`."
778
+ )
779
+ return regressor
780
+
781
+ return estimator
782
+
783
+
784
+ def predict_multivariate(
785
+ forecasters: dict[str, Any],
786
+ steps_ahead: int,
787
+ exog: pd.DataFrame | None = None,
788
+ ) -> pd.DataFrame:
789
+ """
790
+ Generate multi-output predictions using multiple baseline forecasters.
791
+
792
+ Args:
793
+ forecasters (dict): Dictionary of fitted forecaster instances (one per target).
794
+ Keys are target names, values are the fitted forecasters (e.g.,
795
+ ForecasterRecursive, ForecasterEquivalentDate).
796
+ steps_ahead (int): Number of steps to forecast.
797
+ exog (pd.DataFrame, optional): Exogenous variables for prediction.
798
+ If provided, will be passed to each forecaster's predict method.
799
+
800
+ Returns:
801
+ pd.DataFrame: DataFrame with predictions for all targets.
802
+
803
+ Examples:
804
+ >>> import pandas as pd
805
+ >>> from sklearn.linear_model import LinearRegression
806
+ >>> from spotforecast2.forecaster.recursive import ForecasterRecursive
807
+ >>> from spotforecast2.forecaster.utils import predict_multivariate
808
+ >>> y1 = pd.Series([1, 2, 3, 4, 5])
809
+ >>> y2 = pd.Series([2, 4, 6, 8, 10])
810
+ >>> f1 = ForecasterRecursive(estimator=LinearRegression(), lags=2)
811
+ >>> f2 = ForecasterRecursive(estimator=LinearRegression(), lags=2)
812
+ >>> f1.fit(y=y1)
813
+ >>> f2.fit(y=y2)
814
+ >>> forecasters = {'target1': f1, 'target2': f2}
815
+ >>> predictions = predict_multivariate(forecasters, steps_ahead=2)
816
+ >>> predictions
817
+ target1 target2
818
+ 5 6.0 12.0
819
+ 6 7.0 14.0
820
+ """
821
+
822
+ if not forecasters:
823
+ return pd.DataFrame()
824
+
825
+ predictions = {}
826
+
827
+ for target, forecaster in forecasters.items():
828
+ # Generate predictions for this target
829
+ if exog is not None:
830
+ pred = forecaster.predict(steps=steps_ahead, exog=exog)
831
+ else:
832
+ pred = forecaster.predict(steps=steps_ahead)
833
+ predictions[target] = pred
834
+
835
+ # Combine into a single DataFrame
836
+ return pd.concat(predictions, axis=1)
837
+
838
+
839
+ def initialize_transformer_series(
840
+ forecaster_name: str,
841
+ series_names_in_: list[str],
842
+ encoding: str | None = None,
843
+ transformer_series: object | dict[str, object | None] | None = None,
844
+ ) -> dict[str, object | None]:
845
+ """Initialize transformer_series_ attribute for multivariate/multiseries forecasters.
846
+
847
+ Creates a dictionary of transformers for each time series in multivariate or
848
+ multiseries forecasting. Handles three cases: no transformation (None), same
849
+ transformer for all series (single object), or different transformers per series
850
+ (dictionary). Clones transformer objects to avoid overwriting.
851
+
852
+ Args:
853
+ forecaster_name: Name of the forecaster using this function. Special handling
854
+ is applied for 'ForecasterRecursiveMultiSeries'.
855
+ series_names_in_: Names of the time series (levels) used during training.
856
+ These will be the keys in the returned transformer dictionary.
857
+ encoding: Encoding used to identify different series. Only used for
858
+ ForecasterRecursiveMultiSeries. If None, creates a single '_unknown_level'
859
+ entry. Defaults to None.
860
+ transformer_series: Transformer(s) to apply to series. Can be:
861
+ - None: No transformation applied
862
+ - Single transformer object: Same transformer cloned for all series
863
+ - Dict mapping series names to transformers: Different transformer per series
864
+ Defaults to None.
865
+
866
+ Returns:
867
+ dict: Dictionary with series names as keys and transformer objects (or None)
868
+ as values. Transformers are cloned to prevent overwriting.
869
+
870
+ Warnings:
871
+ IgnoredArgumentWarning: If transformer_series is a dict and some series_names_in_
872
+ are not present in the dict keys (those series get no transformation).
873
+
874
+ Examples:
875
+ No transformation:
876
+ >>> from spotforecast2.forecaster.utils import initialize_transformer_series
877
+ >>> series = ['series1', 'series2', 'series3']
878
+ >>> result = initialize_transformer_series(
879
+ ... forecaster_name='ForecasterDirectMultiVariate',
880
+ ... series_names_in_=series,
881
+ ... transformer_series=None
882
+ ... )
883
+ >>> print(result)
884
+ {'series1': None, 'series2': None, 'series3': None}
885
+
886
+ Same transformer for all series:
887
+ >>> from sklearn.preprocessing import StandardScaler
888
+ >>> scaler = StandardScaler()
889
+ >>> result = initialize_transformer_series(
890
+ ... forecaster_name='ForecasterDirectMultiVariate',
891
+ ... series_names_in_=['series1', 'series2'],
892
+ ... transformer_series=scaler
893
+ ... )
894
+ >>> len(result)
895
+ 2
896
+ >>> all(isinstance(v, StandardScaler) for v in result.values())
897
+ True
898
+ >>> result['series1'] is result['series2'] # Different clones
899
+ False
900
+
901
+ Different transformer per series:
902
+ >>> from sklearn.preprocessing import MinMaxScaler
903
+ >>> transformers = {
904
+ ... 'series1': StandardScaler(),
905
+ ... 'series2': MinMaxScaler()
906
+ ... }
907
+ >>> result = initialize_transformer_series(
908
+ ... forecaster_name='ForecasterDirectMultiVariate',
909
+ ... series_names_in_=['series1', 'series2'],
910
+ ... transformer_series=transformers
911
+ ... )
912
+ >>> isinstance(result['series1'], StandardScaler)
913
+ True
914
+ >>> isinstance(result['series2'], MinMaxScaler)
915
+ True
916
+ """
917
+ from copy import deepcopy
918
+ from sklearn.base import clone
919
+ from spotforecast2.exceptions import IgnoredArgumentWarning
920
+
921
+ if forecaster_name == "ForecasterRecursiveMultiSeries":
922
+ if encoding is None:
923
+ series_names_in_ = ["_unknown_level"]
924
+ else:
925
+ series_names_in_ = series_names_in_ + ["_unknown_level"]
926
+
927
+ if transformer_series is None:
928
+ transformer_series_ = {serie: None for serie in series_names_in_}
929
+ elif not isinstance(transformer_series, dict):
930
+ transformer_series_ = {
931
+ serie: clone(transformer_series) for serie in series_names_in_
932
+ }
933
+ else:
934
+ transformer_series_ = {serie: None for serie in series_names_in_}
935
+ # Only elements already present in transformer_series_ are updated
936
+ transformer_series_.update(
937
+ {
938
+ k: deepcopy(v)
939
+ for k, v in transformer_series.items()
940
+ if k in transformer_series_
941
+ }
942
+ )
943
+
944
+ series_not_in_transformer_series = (
945
+ set(series_names_in_) - set(transformer_series.keys())
946
+ ) - {"_unknown_level"}
947
+ if series_not_in_transformer_series:
948
+ warnings.warn(
949
+ f"{series_not_in_transformer_series} not present in `transformer_series`."
950
+ f" No transformation is applied to these series.",
951
+ IgnoredArgumentWarning,
952
+ )
953
+
954
+ return transformer_series_