spotforecast2 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spotforecast2/.DS_Store +0 -0
- spotforecast2/__init__.py +2 -0
- spotforecast2/data/__init__.py +0 -0
- spotforecast2/data/data.py +130 -0
- spotforecast2/data/fetch_data.py +209 -0
- spotforecast2/exceptions.py +681 -0
- spotforecast2/forecaster/.DS_Store +0 -0
- spotforecast2/forecaster/__init__.py +7 -0
- spotforecast2/forecaster/base.py +448 -0
- spotforecast2/forecaster/metrics.py +527 -0
- spotforecast2/forecaster/recursive/__init__.py +4 -0
- spotforecast2/forecaster/recursive/_forecaster_equivalent_date.py +1075 -0
- spotforecast2/forecaster/recursive/_forecaster_recursive.py +939 -0
- spotforecast2/forecaster/recursive/_warnings.py +15 -0
- spotforecast2/forecaster/utils.py +954 -0
- spotforecast2/model_selection/__init__.py +5 -0
- spotforecast2/model_selection/bayesian_search.py +453 -0
- spotforecast2/model_selection/grid_search.py +314 -0
- spotforecast2/model_selection/random_search.py +151 -0
- spotforecast2/model_selection/split_base.py +357 -0
- spotforecast2/model_selection/split_one_step.py +245 -0
- spotforecast2/model_selection/split_ts_cv.py +634 -0
- spotforecast2/model_selection/utils_common.py +718 -0
- spotforecast2/model_selection/utils_metrics.py +103 -0
- spotforecast2/model_selection/validation.py +685 -0
- spotforecast2/preprocessing/__init__.py +30 -0
- spotforecast2/preprocessing/_binner.py +378 -0
- spotforecast2/preprocessing/_common.py +123 -0
- spotforecast2/preprocessing/_differentiator.py +123 -0
- spotforecast2/preprocessing/_rolling.py +136 -0
- spotforecast2/preprocessing/curate_data.py +254 -0
- spotforecast2/preprocessing/imputation.py +92 -0
- spotforecast2/preprocessing/outlier.py +114 -0
- spotforecast2/preprocessing/split.py +139 -0
- spotforecast2/py.typed +0 -0
- spotforecast2/utils/__init__.py +43 -0
- spotforecast2/utils/convert_to_utc.py +44 -0
- spotforecast2/utils/data_transform.py +208 -0
- spotforecast2/utils/forecaster_config.py +344 -0
- spotforecast2/utils/generate_holiday.py +70 -0
- spotforecast2/utils/validation.py +569 -0
- spotforecast2/weather/__init__.py +0 -0
- spotforecast2/weather/weather_client.py +288 -0
- spotforecast2-0.0.1.dist-info/METADATA +47 -0
- spotforecast2-0.0.1.dist-info/RECORD +46 -0
- spotforecast2-0.0.1.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,954 @@
|
|
|
1
|
+
from typing import Any, List, Optional, Tuple, Union
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import numpy as np
|
|
4
|
+
import warnings
|
|
5
|
+
import uuid
|
|
6
|
+
from sklearn.compose import ColumnTransformer
|
|
7
|
+
from spotforecast2.utils import (
|
|
8
|
+
initialize_lags,
|
|
9
|
+
initialize_weights,
|
|
10
|
+
check_select_fit_kwargs,
|
|
11
|
+
check_y,
|
|
12
|
+
check_exog,
|
|
13
|
+
get_exog_dtypes,
|
|
14
|
+
check_exog_dtypes,
|
|
15
|
+
check_predict_input,
|
|
16
|
+
check_interval,
|
|
17
|
+
input_to_frame,
|
|
18
|
+
expand_index,
|
|
19
|
+
transform_dataframe,
|
|
20
|
+
)
|
|
21
|
+
from spotforecast2.exceptions import set_skforecast_warnings, UnknownLevelWarning
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def check_preprocess_series(series):
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def check_preprocess_exog_multiseries(exog):
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def exog_to_direct(
|
|
33
|
+
exog: pd.Series | pd.DataFrame, steps: int
|
|
34
|
+
) -> tuple[pd.DataFrame, list[str]]:
|
|
35
|
+
"""
|
|
36
|
+
Transforms `exog` to a pandas DataFrame with the shape needed for Direct
|
|
37
|
+
forecasting.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
exog : pandas Series, pandas DataFrame
|
|
41
|
+
Exogenous variables.
|
|
42
|
+
steps : int
|
|
43
|
+
Number of steps that will be predicted using exog.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
tuple[pd.DataFrame, list[str]]:
|
|
47
|
+
exog_direct : pandas DataFrame
|
|
48
|
+
Exogenous variables transformed.
|
|
49
|
+
exog_direct_names : list
|
|
50
|
+
Names of the columns of the exogenous variables transformed. Only
|
|
51
|
+
created if `exog` is a pandas Series or DataFrame.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
if not isinstance(exog, (pd.Series, pd.DataFrame)):
|
|
55
|
+
raise TypeError(
|
|
56
|
+
f"`exog` must be a pandas Series or DataFrame. Got {type(exog)}."
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
if isinstance(exog, pd.Series):
|
|
60
|
+
exog = exog.to_frame()
|
|
61
|
+
|
|
62
|
+
n_rows = len(exog)
|
|
63
|
+
exog_idx = exog.index
|
|
64
|
+
exog_cols = exog.columns
|
|
65
|
+
exog_direct = []
|
|
66
|
+
for i in range(steps):
|
|
67
|
+
exog_step = exog.iloc[i : n_rows - (steps - 1 - i),]
|
|
68
|
+
exog_step.index = pd.RangeIndex(len(exog_step))
|
|
69
|
+
exog_step.columns = [f"{col}_step_{i + 1}" for col in exog_cols]
|
|
70
|
+
exog_direct.append(exog_step)
|
|
71
|
+
|
|
72
|
+
exog_direct = pd.concat(exog_direct, axis=1) if steps > 1 else exog_direct[0]
|
|
73
|
+
|
|
74
|
+
exog_direct_names = exog_direct.columns.to_list()
|
|
75
|
+
exog_direct.index = exog_idx[-len(exog_direct) :]
|
|
76
|
+
|
|
77
|
+
return exog_direct, exog_direct_names
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def exog_to_direct_numpy(
|
|
81
|
+
exog: np.ndarray | pd.Series | pd.DataFrame, steps: int
|
|
82
|
+
) -> tuple[np.ndarray, list[str] | None]:
|
|
83
|
+
"""
|
|
84
|
+
Transforms `exog` to numpy ndarray with the shape needed for Direct
|
|
85
|
+
forecasting.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
exog : numpy ndarray, pandas Series, pandas DataFrame
|
|
89
|
+
Exogenous variables, shape(samples,). If exog is a pandas format, the
|
|
90
|
+
direct exog names are created.
|
|
91
|
+
steps : int
|
|
92
|
+
Number of steps that will be predicted using exog.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
tuple[np.ndarray, list[str] | None]:
|
|
96
|
+
exog_direct : numpy ndarray
|
|
97
|
+
Exogenous variables transformed.
|
|
98
|
+
exog_direct_names : list, None
|
|
99
|
+
Names of the columns of the exogenous variables transformed. Only
|
|
100
|
+
created if `exog` is a pandas Series or DataFrame.
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
if isinstance(exog, (pd.Series, pd.DataFrame)):
|
|
104
|
+
exog_cols = exog.columns if isinstance(exog, pd.DataFrame) else [exog.name]
|
|
105
|
+
exog_direct_names = [
|
|
106
|
+
f"{col}_step_{i + 1}" for i in range(steps) for col in exog_cols
|
|
107
|
+
]
|
|
108
|
+
exog = exog.to_numpy()
|
|
109
|
+
else:
|
|
110
|
+
exog_direct_names = None
|
|
111
|
+
if not isinstance(exog, np.ndarray):
|
|
112
|
+
raise TypeError(
|
|
113
|
+
f"`exog` must be a numpy ndarray, pandas Series or DataFrame. "
|
|
114
|
+
f"Got {type(exog)}."
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
if exog.ndim == 1:
|
|
118
|
+
exog = np.expand_dims(exog, axis=1)
|
|
119
|
+
|
|
120
|
+
n_rows = len(exog)
|
|
121
|
+
exog_direct = [exog[i : n_rows - (steps - 1 - i)] for i in range(steps)]
|
|
122
|
+
exog_direct = np.concatenate(exog_direct, axis=1) if steps > 1 else exog_direct[0]
|
|
123
|
+
|
|
124
|
+
return exog_direct, exog_direct_names
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def prepare_steps_direct(
|
|
128
|
+
max_step: int | list[int] | np.ndarray, steps: int | list[int] | None = None
|
|
129
|
+
) -> list[int]:
|
|
130
|
+
"""
|
|
131
|
+
Prepare list of steps to be predicted in Direct Forecasters.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
max_step : int, list, numpy ndarray
|
|
135
|
+
Maximum number of future steps the forecaster will predict
|
|
136
|
+
when using predict methods.
|
|
137
|
+
steps : int, list, None, default None
|
|
138
|
+
Predict n steps. The value of `steps` must be less than or equal to the
|
|
139
|
+
value of steps defined when initializing the forecaster. Starts at 1.
|
|
140
|
+
|
|
141
|
+
- If `int`: Only steps within the range of 1 to int are predicted.
|
|
142
|
+
- If `list`: List of ints. Only the steps contained in the list
|
|
143
|
+
are predicted.
|
|
144
|
+
- If `None`: As many steps are predicted as were defined at
|
|
145
|
+
initialization.
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
list[int]:
|
|
149
|
+
Steps to be predicted.
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
if isinstance(steps, int):
|
|
153
|
+
steps_direct = list(range(1, steps + 1))
|
|
154
|
+
elif steps is None:
|
|
155
|
+
if isinstance(max_step, int):
|
|
156
|
+
steps_direct = list(range(1, max_step + 1))
|
|
157
|
+
else:
|
|
158
|
+
steps_direct = [int(s) for s in max_step]
|
|
159
|
+
elif isinstance(steps, list):
|
|
160
|
+
steps_direct = []
|
|
161
|
+
for step in steps:
|
|
162
|
+
if not isinstance(step, (int, np.integer)):
|
|
163
|
+
raise TypeError(
|
|
164
|
+
f"`steps` argument must be an int, a list of ints or `None`. "
|
|
165
|
+
f"Got {type(steps)}."
|
|
166
|
+
)
|
|
167
|
+
steps_direct.append(int(step))
|
|
168
|
+
|
|
169
|
+
return steps_direct
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def transform_numpy(
|
|
173
|
+
array: np.ndarray,
|
|
174
|
+
transformer: object | None,
|
|
175
|
+
fit: bool = False,
|
|
176
|
+
inverse_transform: bool = False,
|
|
177
|
+
) -> np.ndarray:
|
|
178
|
+
"""
|
|
179
|
+
Transform raw values of a numpy ndarray with a scikit-learn alike
|
|
180
|
+
transformer, preprocessor or ColumnTransformer. The transformer used must
|
|
181
|
+
have the following methods: fit, transform, fit_transform and
|
|
182
|
+
inverse_transform. ColumnTransformers are not allowed since they do not
|
|
183
|
+
have inverse_transform method.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
array : numpy ndarray
|
|
187
|
+
Array to be transformed.
|
|
188
|
+
transformer : scikit-learn alike transformer, preprocessor, or ColumnTransformer.
|
|
189
|
+
Scikit-learn alike transformer (preprocessor) with methods: fit, transform,
|
|
190
|
+
fit_transform and inverse_transform.
|
|
191
|
+
fit : bool, default False
|
|
192
|
+
Train the transformer before applying it.
|
|
193
|
+
inverse_transform : bool, default False
|
|
194
|
+
Transform back the data to the original representation. This is not available
|
|
195
|
+
when using transformers of class scikit-learn ColumnTransformers.
|
|
196
|
+
|
|
197
|
+
Returns
|
|
198
|
+
-------
|
|
199
|
+
array_transformed : numpy ndarray
|
|
200
|
+
Transformed array.
|
|
201
|
+
|
|
202
|
+
"""
|
|
203
|
+
|
|
204
|
+
if transformer is None:
|
|
205
|
+
return array
|
|
206
|
+
|
|
207
|
+
if not isinstance(array, np.ndarray):
|
|
208
|
+
raise TypeError(f"`array` argument must be a numpy ndarray. Got {type(array)}")
|
|
209
|
+
|
|
210
|
+
original_ndim = array.ndim
|
|
211
|
+
original_shape = array.shape
|
|
212
|
+
reshaped_for_inverse = False
|
|
213
|
+
|
|
214
|
+
if original_ndim == 1:
|
|
215
|
+
array = array.reshape(-1, 1)
|
|
216
|
+
|
|
217
|
+
if inverse_transform and isinstance(transformer, ColumnTransformer):
|
|
218
|
+
raise ValueError(
|
|
219
|
+
"`inverse_transform` is not available when using ColumnTransformers."
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
with warnings.catch_warnings():
|
|
223
|
+
warnings.filterwarnings(
|
|
224
|
+
"ignore",
|
|
225
|
+
message="X does not have valid feature names",
|
|
226
|
+
category=UserWarning,
|
|
227
|
+
)
|
|
228
|
+
if not inverse_transform:
|
|
229
|
+
if fit:
|
|
230
|
+
array_transformed = transformer.fit_transform(array)
|
|
231
|
+
else:
|
|
232
|
+
array_transformed = transformer.transform(array)
|
|
233
|
+
else:
|
|
234
|
+
# Vectorized inverse transformation for 2D arrays with multiple columns.
|
|
235
|
+
# Reshape to single column, transform, and reshape back.
|
|
236
|
+
# This is faster than applying the transformer column by column.
|
|
237
|
+
if array.shape[1] > 1:
|
|
238
|
+
array = array.reshape(-1, 1)
|
|
239
|
+
reshaped_for_inverse = True
|
|
240
|
+
array_transformed = transformer.inverse_transform(array)
|
|
241
|
+
|
|
242
|
+
if hasattr(array_transformed, "toarray"):
|
|
243
|
+
# If the returned values are in sparse matrix format, it is converted to dense
|
|
244
|
+
array_transformed = array_transformed.toarray()
|
|
245
|
+
|
|
246
|
+
if isinstance(array_transformed, (pd.Series, pd.DataFrame)):
|
|
247
|
+
array_transformed = array_transformed.to_numpy()
|
|
248
|
+
|
|
249
|
+
# Reshape back to original shape only if we reshaped for inverse_transform
|
|
250
|
+
if reshaped_for_inverse:
|
|
251
|
+
array_transformed = array_transformed.reshape(original_shape)
|
|
252
|
+
|
|
253
|
+
if original_ndim == 1:
|
|
254
|
+
array_transformed = array_transformed.ravel()
|
|
255
|
+
|
|
256
|
+
return array_transformed
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def select_n_jobs_fit_forecaster(forecaster_name, estimator):
|
|
260
|
+
"""
|
|
261
|
+
Select the number of jobs to run in parallel.
|
|
262
|
+
"""
|
|
263
|
+
import os
|
|
264
|
+
|
|
265
|
+
return os.cpu_count() or 1
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
__all__ = [
|
|
269
|
+
"initialize_lags",
|
|
270
|
+
"initialize_weights",
|
|
271
|
+
"check_select_fit_kwargs",
|
|
272
|
+
"check_y",
|
|
273
|
+
"check_exog",
|
|
274
|
+
"get_exog_dtypes",
|
|
275
|
+
"check_exog_dtypes",
|
|
276
|
+
"check_predict_input",
|
|
277
|
+
"check_interval",
|
|
278
|
+
"input_to_frame",
|
|
279
|
+
"expand_index",
|
|
280
|
+
"transform_dataframe",
|
|
281
|
+
"check_preprocess_series",
|
|
282
|
+
"check_preprocess_exog_multiseries",
|
|
283
|
+
"set_skforecast_warnings",
|
|
284
|
+
"initialize_window_features",
|
|
285
|
+
"initialize_transformer_series",
|
|
286
|
+
"check_extract_values_and_index",
|
|
287
|
+
"get_style_repr_html",
|
|
288
|
+
"initialize_estimator",
|
|
289
|
+
"check_residuals_input",
|
|
290
|
+
"date_to_index_position",
|
|
291
|
+
"prepare_steps_direct",
|
|
292
|
+
"exog_to_direct",
|
|
293
|
+
"exog_to_direct_numpy",
|
|
294
|
+
"transform_numpy",
|
|
295
|
+
"select_n_jobs_fit_forecaster",
|
|
296
|
+
"predict_multivariate",
|
|
297
|
+
]
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def initialize_window_features(
|
|
301
|
+
window_features: Any,
|
|
302
|
+
) -> Tuple[Optional[List[object]], Optional[List[str]], Optional[int]]:
|
|
303
|
+
"""Check window_features argument input and generate the corresponding list.
|
|
304
|
+
|
|
305
|
+
This function validates window feature objects and extracts their metadata,
|
|
306
|
+
ensuring they have the required attributes (window_sizes, features_names) and
|
|
307
|
+
methods (transform_batch, transform) for proper forecasting operations.
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
window_features: Classes used to create window features. Can be a single
|
|
311
|
+
object or a list of objects. Each object must have `window_sizes`,
|
|
312
|
+
`features_names` attributes and `transform_batch`, `transform` methods.
|
|
313
|
+
|
|
314
|
+
Returns:
|
|
315
|
+
tuple: A tuple containing:
|
|
316
|
+
- window_features (list or None): List of classes used to create window features.
|
|
317
|
+
- window_features_names (list or None): List with all the features names of the window features.
|
|
318
|
+
- max_size_window_features (int or None): Maximum value of the `window_sizes` attribute of all classes.
|
|
319
|
+
|
|
320
|
+
Raises:
|
|
321
|
+
ValueError: If `window_features` is an empty list.
|
|
322
|
+
ValueError: If a window feature is missing required attributes or methods.
|
|
323
|
+
TypeError: If `window_sizes` or `features_names` have incorrect types.
|
|
324
|
+
|
|
325
|
+
Examples:
|
|
326
|
+
>>> from spotforecast2.forecaster.preprocessing import RollingFeatures
|
|
327
|
+
>>> wf = RollingFeatures(stats=['mean', 'std'], window_sizes=[7, 14])
|
|
328
|
+
>>> wf_list, names, max_size = initialize_window_features(wf)
|
|
329
|
+
>>> print(f"Max window size: {max_size}")
|
|
330
|
+
Max window size: 14
|
|
331
|
+
>>> print(f"Number of features: {len(names)}")
|
|
332
|
+
Number of features: 4
|
|
333
|
+
|
|
334
|
+
Multiple window features:
|
|
335
|
+
>>> wf1 = RollingFeatures(stats=['mean'], window_sizes=7)
|
|
336
|
+
>>> wf2 = RollingFeatures(stats=['max', 'min'], window_sizes=3)
|
|
337
|
+
>>> wf_list, names, max_size = initialize_window_features([wf1, wf2])
|
|
338
|
+
>>> print(f"Max window size: {max_size}")
|
|
339
|
+
Max window size: 7
|
|
340
|
+
"""
|
|
341
|
+
|
|
342
|
+
needed_atts = ["window_sizes", "features_names"]
|
|
343
|
+
needed_methods = ["transform_batch", "transform"]
|
|
344
|
+
|
|
345
|
+
max_window_sizes = None
|
|
346
|
+
window_features_names = None
|
|
347
|
+
max_size_window_features = None
|
|
348
|
+
if window_features is not None:
|
|
349
|
+
if isinstance(window_features, list) and len(window_features) < 1:
|
|
350
|
+
raise ValueError(
|
|
351
|
+
"Argument `window_features` must contain at least one element."
|
|
352
|
+
)
|
|
353
|
+
if not isinstance(window_features, list):
|
|
354
|
+
window_features = [window_features]
|
|
355
|
+
|
|
356
|
+
link_to_docs = (
|
|
357
|
+
"\nVisit the documentation for more information about how to create "
|
|
358
|
+
"custom window features:\n"
|
|
359
|
+
"https://skforecast.org/latest/user_guides/window-features-and-custom-features.html#create-your-custom-window-features"
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
max_window_sizes = []
|
|
363
|
+
window_features_names = []
|
|
364
|
+
needed_atts_set = set(needed_atts)
|
|
365
|
+
needed_methods_set = set(needed_methods)
|
|
366
|
+
for wf in window_features:
|
|
367
|
+
wf_name = type(wf).__name__
|
|
368
|
+
atts_methods = set(dir(wf))
|
|
369
|
+
if not needed_atts_set.issubset(atts_methods):
|
|
370
|
+
raise ValueError(
|
|
371
|
+
f"{wf_name} must have the attributes: {needed_atts}." + link_to_docs
|
|
372
|
+
)
|
|
373
|
+
if not needed_methods_set.issubset(atts_methods):
|
|
374
|
+
raise ValueError(
|
|
375
|
+
f"{wf_name} must have the methods: {needed_methods}." + link_to_docs
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
window_sizes = wf.window_sizes
|
|
379
|
+
if not isinstance(window_sizes, (int, list)):
|
|
380
|
+
raise TypeError(
|
|
381
|
+
f"Attribute `window_sizes` of {wf_name} must be an int or a list "
|
|
382
|
+
f"of ints. Got {type(window_sizes)}." + link_to_docs
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
if isinstance(window_sizes, int):
|
|
386
|
+
if window_sizes < 1:
|
|
387
|
+
raise ValueError(
|
|
388
|
+
f"If argument `window_sizes` is an integer, it must be equal to or "
|
|
389
|
+
f"greater than 1. Got {window_sizes} from {wf_name}."
|
|
390
|
+
+ link_to_docs
|
|
391
|
+
)
|
|
392
|
+
max_window_sizes.append(window_sizes)
|
|
393
|
+
else:
|
|
394
|
+
if not all(isinstance(ws, int) for ws in window_sizes) or not all(
|
|
395
|
+
ws >= 1 for ws in window_sizes
|
|
396
|
+
):
|
|
397
|
+
raise ValueError(
|
|
398
|
+
f"If argument `window_sizes` is a list, all elements must be integers "
|
|
399
|
+
f"equal to or greater than 1. Got {window_sizes} from {wf_name}."
|
|
400
|
+
+ link_to_docs
|
|
401
|
+
)
|
|
402
|
+
max_window_sizes.append(max(window_sizes))
|
|
403
|
+
|
|
404
|
+
features_names = wf.features_names
|
|
405
|
+
if not isinstance(features_names, (str, list)):
|
|
406
|
+
raise TypeError(
|
|
407
|
+
f"Attribute `features_names` of {wf_name} must be a str or "
|
|
408
|
+
f"a list of strings. Got {type(features_names)}." + link_to_docs
|
|
409
|
+
)
|
|
410
|
+
if isinstance(features_names, str):
|
|
411
|
+
window_features_names.append(features_names)
|
|
412
|
+
else:
|
|
413
|
+
if not all(isinstance(fn, str) for fn in features_names):
|
|
414
|
+
raise TypeError(
|
|
415
|
+
f"If argument `features_names` is a list, all elements "
|
|
416
|
+
f"must be strings. Got {features_names} from {wf_name}."
|
|
417
|
+
+ link_to_docs
|
|
418
|
+
)
|
|
419
|
+
window_features_names.extend(features_names)
|
|
420
|
+
|
|
421
|
+
max_size_window_features = max(max_window_sizes)
|
|
422
|
+
if len(set(window_features_names)) != len(window_features_names):
|
|
423
|
+
raise ValueError(
|
|
424
|
+
f"All window features names must be unique. Got {window_features_names}."
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
return window_features, window_features_names, max_size_window_features
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
def check_extract_values_and_index(
|
|
431
|
+
data: Union[pd.Series, pd.DataFrame],
|
|
432
|
+
data_label: str = "`y`",
|
|
433
|
+
ignore_freq: bool = False,
|
|
434
|
+
return_values: bool = True,
|
|
435
|
+
) -> Tuple[Optional[np.ndarray], pd.Index]:
|
|
436
|
+
"""Extract values and index from a pandas Series or DataFrame, ensuring they are valid.
|
|
437
|
+
|
|
438
|
+
Validates that the input data has a proper DatetimeIndex or RangeIndex and extracts
|
|
439
|
+
its values and index for use in forecasting operations. Optionally checks for
|
|
440
|
+
index frequency consistency.
|
|
441
|
+
|
|
442
|
+
Args:
|
|
443
|
+
data: Input data (pandas Series or DataFrame) to extract values and index from.
|
|
444
|
+
data_label: Label used in exception messages for better error reporting.
|
|
445
|
+
Defaults to "`y`".
|
|
446
|
+
ignore_freq: If True, the frequency of the index is not checked.
|
|
447
|
+
Defaults to False.
|
|
448
|
+
return_values: If True, the values of the data are returned.
|
|
449
|
+
Defaults to True.
|
|
450
|
+
|
|
451
|
+
Returns:
|
|
452
|
+
tuple: A tuple containing:
|
|
453
|
+
- values (numpy.ndarray or None): Values of the data as numpy array,
|
|
454
|
+
or None if return_values is False.
|
|
455
|
+
- index (pandas.Index): Index of the data.
|
|
456
|
+
|
|
457
|
+
Raises:
|
|
458
|
+
TypeError: If data is not a pandas Series or DataFrame.
|
|
459
|
+
TypeError: If data index is not a DatetimeIndex or RangeIndex.
|
|
460
|
+
|
|
461
|
+
Warnings:
|
|
462
|
+
UserWarning: If DatetimeIndex has no frequency (inferred automatically).
|
|
463
|
+
|
|
464
|
+
Examples:
|
|
465
|
+
>>> import pandas as pd
|
|
466
|
+
>>> import numpy as np
|
|
467
|
+
>>> dates = pd.date_range('2020-01-01', periods=10, freq='D')
|
|
468
|
+
>>> series = pd.Series(np.arange(10), index=dates)
|
|
469
|
+
>>> values, index = check_extract_values_and_index(series)
|
|
470
|
+
>>> print(values.shape)
|
|
471
|
+
(10,)
|
|
472
|
+
>>> print(type(index))
|
|
473
|
+
<class 'pandas.core.indexes.datetimes.DatetimeIndex'>
|
|
474
|
+
|
|
475
|
+
Extract index only:
|
|
476
|
+
>>> _, index = check_extract_values_and_index(series, return_values=False)
|
|
477
|
+
>>> print(index[0])
|
|
478
|
+
2020-01-01 00:00:00
|
|
479
|
+
"""
|
|
480
|
+
|
|
481
|
+
if not isinstance(data, (pd.Series, pd.DataFrame)):
|
|
482
|
+
raise TypeError(f"{data_label} must be a pandas Series or DataFrame.")
|
|
483
|
+
|
|
484
|
+
if not isinstance(data.index, (pd.DatetimeIndex, pd.RangeIndex)):
|
|
485
|
+
raise TypeError(f"{data_label} must have a pandas DatetimeIndex or RangeIndex.")
|
|
486
|
+
|
|
487
|
+
if isinstance(data.index, pd.DatetimeIndex) and not ignore_freq:
|
|
488
|
+
if data.index.freq is None:
|
|
489
|
+
warnings.warn(
|
|
490
|
+
f"{data_label} has a DatetimeIndex but no frequency. "
|
|
491
|
+
"The frequency has been inferred from the index.",
|
|
492
|
+
UserWarning,
|
|
493
|
+
)
|
|
494
|
+
|
|
495
|
+
values = data.to_numpy() if return_values else None
|
|
496
|
+
|
|
497
|
+
return values, data.index
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
def get_style_repr_html(is_fitted: bool = False) -> Tuple[str, str]:
|
|
501
|
+
"""Generate CSS style for HTML representation of the Forecaster.
|
|
502
|
+
|
|
503
|
+
Creates a unique CSS style block with a container ID for rendering
|
|
504
|
+
forecaster objects in Jupyter notebooks or HTML documents. The styling
|
|
505
|
+
provides a clean, monospace display with a light gray background.
|
|
506
|
+
|
|
507
|
+
Args:
|
|
508
|
+
is_fitted: Parameter to indicate if the Forecaster has been fitted.
|
|
509
|
+
Currently not used in styling but reserved for future extensions.
|
|
510
|
+
|
|
511
|
+
Returns:
|
|
512
|
+
tuple: A tuple containing:
|
|
513
|
+
- style (str): CSS style block as a string with unique container class.
|
|
514
|
+
- unique_id (str): Unique 8-character ID for the container element.
|
|
515
|
+
|
|
516
|
+
Examples:
|
|
517
|
+
>>> style, uid = get_style_repr_html(is_fitted=True)
|
|
518
|
+
>>> print(f"Container ID: {uid}")
|
|
519
|
+
Container ID: a1b2c3d4
|
|
520
|
+
>>> print(f"Style contains CSS: {'container-' in style}")
|
|
521
|
+
Style contains CSS: True
|
|
522
|
+
|
|
523
|
+
Using in HTML rendering:
|
|
524
|
+
>>> style, uid = get_style_repr_html(is_fitted=False)
|
|
525
|
+
>>> html = f"{style}<div class='container-{uid}'>Forecaster Info</div>"
|
|
526
|
+
>>> print("background-color" in html)
|
|
527
|
+
True
|
|
528
|
+
"""
|
|
529
|
+
|
|
530
|
+
unique_id = str(uuid.uuid4())[:8]
|
|
531
|
+
style = f"""
|
|
532
|
+
<style>
|
|
533
|
+
.container-{unique_id} {{
|
|
534
|
+
font-family: monospace;
|
|
535
|
+
background-color: #f0f0f0;
|
|
536
|
+
padding: 10px;
|
|
537
|
+
border-radius: 5px;
|
|
538
|
+
}}
|
|
539
|
+
</style>
|
|
540
|
+
"""
|
|
541
|
+
return style, unique_id
|
|
542
|
+
|
|
543
|
+
|
|
544
|
+
def check_residuals_input(
|
|
545
|
+
forecaster_name: str,
|
|
546
|
+
use_in_sample_residuals: bool,
|
|
547
|
+
in_sample_residuals_: np.ndarray | dict[str, np.ndarray] | None,
|
|
548
|
+
out_sample_residuals_: np.ndarray | dict[str, np.ndarray] | None,
|
|
549
|
+
use_binned_residuals: bool,
|
|
550
|
+
in_sample_residuals_by_bin_: (
|
|
551
|
+
dict[str | int, np.ndarray | dict[int, np.ndarray]] | None
|
|
552
|
+
),
|
|
553
|
+
out_sample_residuals_by_bin_: (
|
|
554
|
+
dict[str | int, np.ndarray | dict[int, np.ndarray]] | None
|
|
555
|
+
),
|
|
556
|
+
levels: list[str] | None = None,
|
|
557
|
+
encoding: str | None = None,
|
|
558
|
+
) -> None:
|
|
559
|
+
"""
|
|
560
|
+
Check residuals input arguments in Forecasters.
|
|
561
|
+
|
|
562
|
+
Args:
|
|
563
|
+
forecaster_name : str
|
|
564
|
+
Forecaster name.
|
|
565
|
+
use_in_sample_residuals : bool
|
|
566
|
+
Indicates if in sample or out sample residuals are used.
|
|
567
|
+
in_sample_residuals_ : numpy ndarray, dict
|
|
568
|
+
Residuals of the model when predicting training data.
|
|
569
|
+
out_sample_residuals_ : numpy ndarray, dict
|
|
570
|
+
Residuals of the model when predicting non training data.
|
|
571
|
+
use_binned_residuals : bool
|
|
572
|
+
Indicates if residuals are binned.
|
|
573
|
+
in_sample_residuals_by_bin_ : dict
|
|
574
|
+
In sample residuals binned according to the predicted value each residual
|
|
575
|
+
is associated with.
|
|
576
|
+
out_sample_residuals_by_bin_ : dict
|
|
577
|
+
Out of sample residuals binned according to the predicted value each residual
|
|
578
|
+
is associated with.
|
|
579
|
+
levels : list, default None
|
|
580
|
+
Names of the series (levels) to be predicted (Forecasters multiseries).
|
|
581
|
+
encoding : str, default None
|
|
582
|
+
Encoding used to identify the different series (ForecasterRecursiveMultiSeries).
|
|
583
|
+
|
|
584
|
+
Returns:
|
|
585
|
+
None
|
|
586
|
+
|
|
587
|
+
"""
|
|
588
|
+
|
|
589
|
+
forecasters_multiseries = (
|
|
590
|
+
"ForecasterRecursiveMultiSeries",
|
|
591
|
+
"ForecasterDirectMultiVariate",
|
|
592
|
+
"ForecasterRnn",
|
|
593
|
+
)
|
|
594
|
+
|
|
595
|
+
if use_in_sample_residuals:
|
|
596
|
+
if use_binned_residuals:
|
|
597
|
+
residuals = in_sample_residuals_by_bin_
|
|
598
|
+
literal = "in_sample_residuals_by_bin_"
|
|
599
|
+
else:
|
|
600
|
+
residuals = in_sample_residuals_
|
|
601
|
+
literal = "in_sample_residuals_"
|
|
602
|
+
|
|
603
|
+
# Check if residuals are empty or None
|
|
604
|
+
is_empty = (
|
|
605
|
+
residuals is None
|
|
606
|
+
or (isinstance(residuals, dict) and not residuals)
|
|
607
|
+
or (isinstance(residuals, np.ndarray) and residuals.size == 0)
|
|
608
|
+
)
|
|
609
|
+
if is_empty:
|
|
610
|
+
raise ValueError(
|
|
611
|
+
f"`forecaster.{literal}` is either None or empty. Use "
|
|
612
|
+
f"`store_in_sample_residuals = True` when fitting the forecaster "
|
|
613
|
+
f"or use the `set_in_sample_residuals()` method before predicting."
|
|
614
|
+
)
|
|
615
|
+
|
|
616
|
+
if forecaster_name in forecasters_multiseries:
|
|
617
|
+
if encoding is not None:
|
|
618
|
+
unknown_levels = set(levels) - set(residuals.keys())
|
|
619
|
+
if unknown_levels:
|
|
620
|
+
warnings.warn(
|
|
621
|
+
f"`levels` {unknown_levels} are not present in `forecaster.{literal}`, "
|
|
622
|
+
f"most likely because they were not present in the training data. "
|
|
623
|
+
f"A random sample of the residuals from other levels will be used. "
|
|
624
|
+
f"This can lead to inaccurate intervals for the unknown levels.",
|
|
625
|
+
UnknownLevelWarning,
|
|
626
|
+
)
|
|
627
|
+
else:
|
|
628
|
+
if use_binned_residuals:
|
|
629
|
+
residuals = out_sample_residuals_by_bin_
|
|
630
|
+
literal = "out_sample_residuals_by_bin_"
|
|
631
|
+
else:
|
|
632
|
+
residuals = out_sample_residuals_
|
|
633
|
+
literal = "out_sample_residuals_"
|
|
634
|
+
|
|
635
|
+
is_empty = (
|
|
636
|
+
residuals is None
|
|
637
|
+
or (isinstance(residuals, dict) and not residuals)
|
|
638
|
+
or (isinstance(residuals, np.ndarray) and residuals.size == 0)
|
|
639
|
+
)
|
|
640
|
+
if is_empty:
|
|
641
|
+
raise ValueError(
|
|
642
|
+
f"`forecaster.{literal}` is either None or empty. Use "
|
|
643
|
+
f"`set_out_sample_residuals()` method before predicting."
|
|
644
|
+
)
|
|
645
|
+
|
|
646
|
+
if forecaster_name in forecasters_multiseries:
|
|
647
|
+
if encoding is not None:
|
|
648
|
+
unknown_levels = set(levels) - set(residuals.keys())
|
|
649
|
+
if unknown_levels:
|
|
650
|
+
warnings.warn(
|
|
651
|
+
f"`levels` {unknown_levels} are not present in `forecaster.{literal}`, "
|
|
652
|
+
f"most likely because they were not present in the training data. "
|
|
653
|
+
f"A random sample of the residuals from other levels will be used. "
|
|
654
|
+
f"This can lead to inaccurate intervals for the unknown levels.",
|
|
655
|
+
UnknownLevelWarning,
|
|
656
|
+
)
|
|
657
|
+
|
|
658
|
+
|
|
659
|
+
def date_to_index_position(
|
|
660
|
+
index: pd.Index,
|
|
661
|
+
date_input: int | str | pd.Timestamp,
|
|
662
|
+
method: str = "prediction",
|
|
663
|
+
date_literal: str = "steps",
|
|
664
|
+
kwargs_pd_to_datetime: dict = {},
|
|
665
|
+
) -> int:
|
|
666
|
+
"""
|
|
667
|
+
Transform a datetime string or pandas Timestamp to an integer. The integer
|
|
668
|
+
represents the position of the datetime in the index.
|
|
669
|
+
|
|
670
|
+
Args:
|
|
671
|
+
index : pandas Index
|
|
672
|
+
Original datetime index (must be a pandas DatetimeIndex if `date_input`
|
|
673
|
+
is not an int).
|
|
674
|
+
date_input : int, str, pandas Timestamp
|
|
675
|
+
Datetime to transform to integer.
|
|
676
|
+
|
|
677
|
+
+ If int, returns the same integer.
|
|
678
|
+
+ If str or pandas Timestamp, it is converted and expanded into the index.
|
|
679
|
+
method : str, default 'prediction'
|
|
680
|
+
Can be 'prediction' or 'validation'.
|
|
681
|
+
|
|
682
|
+
+ If 'prediction', the date must be later than the last date in the index.
|
|
683
|
+
+ If 'validation', the date must be within the index range.
|
|
684
|
+
date_literal : str, default 'steps'
|
|
685
|
+
Variable name used in error messages.
|
|
686
|
+
kwargs_pd_to_datetime : dict, default {}
|
|
687
|
+
Additional keyword arguments to pass to `pd.to_datetime()`.
|
|
688
|
+
|
|
689
|
+
Returns:
|
|
690
|
+
int:
|
|
691
|
+
`date_input` transformed to integer position in the `index`.
|
|
692
|
+
|
|
693
|
+
+ If `date_input` is an integer, it returns the same integer.
|
|
694
|
+
+ If method is 'prediction', number of steps to predict from the last
|
|
695
|
+
date in the index.
|
|
696
|
+
+ If method is 'validation', position plus one of the date in the index,
|
|
697
|
+
this is done to include the target date in the training set when using
|
|
698
|
+
pandas iloc with slices.
|
|
699
|
+
|
|
700
|
+
"""
|
|
701
|
+
|
|
702
|
+
if method not in ["prediction", "validation"]:
|
|
703
|
+
raise ValueError("`method` must be 'prediction' or 'validation'.")
|
|
704
|
+
|
|
705
|
+
if isinstance(date_input, (str, pd.Timestamp)):
|
|
706
|
+
if not isinstance(index, pd.DatetimeIndex):
|
|
707
|
+
raise TypeError(
|
|
708
|
+
f"Index must be a pandas DatetimeIndex when `{date_literal}` is "
|
|
709
|
+
f"not an integer. Check input series or last window."
|
|
710
|
+
)
|
|
711
|
+
|
|
712
|
+
target_date = pd.to_datetime(date_input, **kwargs_pd_to_datetime)
|
|
713
|
+
last_date = pd.to_datetime(index[-1])
|
|
714
|
+
|
|
715
|
+
if method == "prediction":
|
|
716
|
+
if target_date <= last_date:
|
|
717
|
+
raise ValueError(
|
|
718
|
+
"If `steps` is a date, it must be greater than the last date "
|
|
719
|
+
"in the index."
|
|
720
|
+
)
|
|
721
|
+
span_index = pd.date_range(
|
|
722
|
+
start=last_date, end=target_date, freq=index.freq
|
|
723
|
+
)
|
|
724
|
+
output = len(span_index) - 1
|
|
725
|
+
elif method == "validation":
|
|
726
|
+
first_date = pd.to_datetime(index[0])
|
|
727
|
+
if target_date < first_date or target_date > last_date:
|
|
728
|
+
raise ValueError(
|
|
729
|
+
"If `initial_train_size` is a date, it must be greater than "
|
|
730
|
+
"the first date in the index and less than the last date."
|
|
731
|
+
)
|
|
732
|
+
span_index = pd.date_range(
|
|
733
|
+
start=first_date, end=target_date, freq=index.freq
|
|
734
|
+
)
|
|
735
|
+
output = len(span_index)
|
|
736
|
+
|
|
737
|
+
elif isinstance(date_input, (int, np.integer)):
|
|
738
|
+
output = date_input
|
|
739
|
+
|
|
740
|
+
else:
|
|
741
|
+
raise TypeError(
|
|
742
|
+
f"`{date_literal}` must be an integer, string, or pandas Timestamp."
|
|
743
|
+
)
|
|
744
|
+
|
|
745
|
+
return output
|
|
746
|
+
|
|
747
|
+
|
|
748
|
+
def initialize_estimator(
|
|
749
|
+
estimator: object | None = None, regressor: object | None = None
|
|
750
|
+
) -> None:
|
|
751
|
+
"""
|
|
752
|
+
Helper to handle the deprecation of 'regressor' in favor of 'estimator'.
|
|
753
|
+
Returns the valid estimator object.
|
|
754
|
+
|
|
755
|
+
Args:
|
|
756
|
+
estimator : estimator or pipeline compatible with the scikit-learn API, default None
|
|
757
|
+
An instance of a estimator or pipeline compatible with the scikit-learn API.
|
|
758
|
+
regressor : estimator or pipeline compatible with the scikit-learn API, default None
|
|
759
|
+
Deprecated. An instance of a estimator or pipeline compatible with the
|
|
760
|
+
scikit-learn API.
|
|
761
|
+
|
|
762
|
+
Returns:
|
|
763
|
+
estimator or pipeline compatible with the scikit-learn API
|
|
764
|
+
The valid estimator object.
|
|
765
|
+
|
|
766
|
+
"""
|
|
767
|
+
|
|
768
|
+
if regressor is not None:
|
|
769
|
+
warnings.warn(
|
|
770
|
+
"The `regressor` argument is deprecated and will be removed in a future "
|
|
771
|
+
"version. Please use `estimator` instead.",
|
|
772
|
+
FutureWarning,
|
|
773
|
+
stacklevel=3, # Important: to point to the user's code
|
|
774
|
+
)
|
|
775
|
+
if estimator is not None:
|
|
776
|
+
raise ValueError(
|
|
777
|
+
"Both `estimator` and `regressor` were provided. Use only `estimator`."
|
|
778
|
+
)
|
|
779
|
+
return regressor
|
|
780
|
+
|
|
781
|
+
return estimator
|
|
782
|
+
|
|
783
|
+
|
|
784
|
+
def predict_multivariate(
|
|
785
|
+
forecasters: dict[str, Any],
|
|
786
|
+
steps_ahead: int,
|
|
787
|
+
exog: pd.DataFrame | None = None,
|
|
788
|
+
) -> pd.DataFrame:
|
|
789
|
+
"""
|
|
790
|
+
Generate multi-output predictions using multiple baseline forecasters.
|
|
791
|
+
|
|
792
|
+
Args:
|
|
793
|
+
forecasters (dict): Dictionary of fitted forecaster instances (one per target).
|
|
794
|
+
Keys are target names, values are the fitted forecasters (e.g.,
|
|
795
|
+
ForecasterRecursive, ForecasterEquivalentDate).
|
|
796
|
+
steps_ahead (int): Number of steps to forecast.
|
|
797
|
+
exog (pd.DataFrame, optional): Exogenous variables for prediction.
|
|
798
|
+
If provided, will be passed to each forecaster's predict method.
|
|
799
|
+
|
|
800
|
+
Returns:
|
|
801
|
+
pd.DataFrame: DataFrame with predictions for all targets.
|
|
802
|
+
|
|
803
|
+
Examples:
|
|
804
|
+
>>> import pandas as pd
|
|
805
|
+
>>> from sklearn.linear_model import LinearRegression
|
|
806
|
+
>>> from spotforecast2.forecaster.recursive import ForecasterRecursive
|
|
807
|
+
>>> from spotforecast2.forecaster.utils import predict_multivariate
|
|
808
|
+
>>> y1 = pd.Series([1, 2, 3, 4, 5])
|
|
809
|
+
>>> y2 = pd.Series([2, 4, 6, 8, 10])
|
|
810
|
+
>>> f1 = ForecasterRecursive(estimator=LinearRegression(), lags=2)
|
|
811
|
+
>>> f2 = ForecasterRecursive(estimator=LinearRegression(), lags=2)
|
|
812
|
+
>>> f1.fit(y=y1)
|
|
813
|
+
>>> f2.fit(y=y2)
|
|
814
|
+
>>> forecasters = {'target1': f1, 'target2': f2}
|
|
815
|
+
>>> predictions = predict_multivariate(forecasters, steps_ahead=2)
|
|
816
|
+
>>> predictions
|
|
817
|
+
target1 target2
|
|
818
|
+
5 6.0 12.0
|
|
819
|
+
6 7.0 14.0
|
|
820
|
+
"""
|
|
821
|
+
|
|
822
|
+
if not forecasters:
|
|
823
|
+
return pd.DataFrame()
|
|
824
|
+
|
|
825
|
+
predictions = {}
|
|
826
|
+
|
|
827
|
+
for target, forecaster in forecasters.items():
|
|
828
|
+
# Generate predictions for this target
|
|
829
|
+
if exog is not None:
|
|
830
|
+
pred = forecaster.predict(steps=steps_ahead, exog=exog)
|
|
831
|
+
else:
|
|
832
|
+
pred = forecaster.predict(steps=steps_ahead)
|
|
833
|
+
predictions[target] = pred
|
|
834
|
+
|
|
835
|
+
# Combine into a single DataFrame
|
|
836
|
+
return pd.concat(predictions, axis=1)
|
|
837
|
+
|
|
838
|
+
|
|
839
|
+
def initialize_transformer_series(
|
|
840
|
+
forecaster_name: str,
|
|
841
|
+
series_names_in_: list[str],
|
|
842
|
+
encoding: str | None = None,
|
|
843
|
+
transformer_series: object | dict[str, object | None] | None = None,
|
|
844
|
+
) -> dict[str, object | None]:
|
|
845
|
+
"""Initialize transformer_series_ attribute for multivariate/multiseries forecasters.
|
|
846
|
+
|
|
847
|
+
Creates a dictionary of transformers for each time series in multivariate or
|
|
848
|
+
multiseries forecasting. Handles three cases: no transformation (None), same
|
|
849
|
+
transformer for all series (single object), or different transformers per series
|
|
850
|
+
(dictionary). Clones transformer objects to avoid overwriting.
|
|
851
|
+
|
|
852
|
+
Args:
|
|
853
|
+
forecaster_name: Name of the forecaster using this function. Special handling
|
|
854
|
+
is applied for 'ForecasterRecursiveMultiSeries'.
|
|
855
|
+
series_names_in_: Names of the time series (levels) used during training.
|
|
856
|
+
These will be the keys in the returned transformer dictionary.
|
|
857
|
+
encoding: Encoding used to identify different series. Only used for
|
|
858
|
+
ForecasterRecursiveMultiSeries. If None, creates a single '_unknown_level'
|
|
859
|
+
entry. Defaults to None.
|
|
860
|
+
transformer_series: Transformer(s) to apply to series. Can be:
|
|
861
|
+
- None: No transformation applied
|
|
862
|
+
- Single transformer object: Same transformer cloned for all series
|
|
863
|
+
- Dict mapping series names to transformers: Different transformer per series
|
|
864
|
+
Defaults to None.
|
|
865
|
+
|
|
866
|
+
Returns:
|
|
867
|
+
dict: Dictionary with series names as keys and transformer objects (or None)
|
|
868
|
+
as values. Transformers are cloned to prevent overwriting.
|
|
869
|
+
|
|
870
|
+
Warnings:
|
|
871
|
+
IgnoredArgumentWarning: If transformer_series is a dict and some series_names_in_
|
|
872
|
+
are not present in the dict keys (those series get no transformation).
|
|
873
|
+
|
|
874
|
+
Examples:
|
|
875
|
+
No transformation:
|
|
876
|
+
>>> from spotforecast2.forecaster.utils import initialize_transformer_series
|
|
877
|
+
>>> series = ['series1', 'series2', 'series3']
|
|
878
|
+
>>> result = initialize_transformer_series(
|
|
879
|
+
... forecaster_name='ForecasterDirectMultiVariate',
|
|
880
|
+
... series_names_in_=series,
|
|
881
|
+
... transformer_series=None
|
|
882
|
+
... )
|
|
883
|
+
>>> print(result)
|
|
884
|
+
{'series1': None, 'series2': None, 'series3': None}
|
|
885
|
+
|
|
886
|
+
Same transformer for all series:
|
|
887
|
+
>>> from sklearn.preprocessing import StandardScaler
|
|
888
|
+
>>> scaler = StandardScaler()
|
|
889
|
+
>>> result = initialize_transformer_series(
|
|
890
|
+
... forecaster_name='ForecasterDirectMultiVariate',
|
|
891
|
+
... series_names_in_=['series1', 'series2'],
|
|
892
|
+
... transformer_series=scaler
|
|
893
|
+
... )
|
|
894
|
+
>>> len(result)
|
|
895
|
+
2
|
|
896
|
+
>>> all(isinstance(v, StandardScaler) for v in result.values())
|
|
897
|
+
True
|
|
898
|
+
>>> result['series1'] is result['series2'] # Different clones
|
|
899
|
+
False
|
|
900
|
+
|
|
901
|
+
Different transformer per series:
|
|
902
|
+
>>> from sklearn.preprocessing import MinMaxScaler
|
|
903
|
+
>>> transformers = {
|
|
904
|
+
... 'series1': StandardScaler(),
|
|
905
|
+
... 'series2': MinMaxScaler()
|
|
906
|
+
... }
|
|
907
|
+
>>> result = initialize_transformer_series(
|
|
908
|
+
... forecaster_name='ForecasterDirectMultiVariate',
|
|
909
|
+
... series_names_in_=['series1', 'series2'],
|
|
910
|
+
... transformer_series=transformers
|
|
911
|
+
... )
|
|
912
|
+
>>> isinstance(result['series1'], StandardScaler)
|
|
913
|
+
True
|
|
914
|
+
>>> isinstance(result['series2'], MinMaxScaler)
|
|
915
|
+
True
|
|
916
|
+
"""
|
|
917
|
+
from copy import deepcopy
|
|
918
|
+
from sklearn.base import clone
|
|
919
|
+
from spotforecast2.exceptions import IgnoredArgumentWarning
|
|
920
|
+
|
|
921
|
+
if forecaster_name == "ForecasterRecursiveMultiSeries":
|
|
922
|
+
if encoding is None:
|
|
923
|
+
series_names_in_ = ["_unknown_level"]
|
|
924
|
+
else:
|
|
925
|
+
series_names_in_ = series_names_in_ + ["_unknown_level"]
|
|
926
|
+
|
|
927
|
+
if transformer_series is None:
|
|
928
|
+
transformer_series_ = {serie: None for serie in series_names_in_}
|
|
929
|
+
elif not isinstance(transformer_series, dict):
|
|
930
|
+
transformer_series_ = {
|
|
931
|
+
serie: clone(transformer_series) for serie in series_names_in_
|
|
932
|
+
}
|
|
933
|
+
else:
|
|
934
|
+
transformer_series_ = {serie: None for serie in series_names_in_}
|
|
935
|
+
# Only elements already present in transformer_series_ are updated
|
|
936
|
+
transformer_series_.update(
|
|
937
|
+
{
|
|
938
|
+
k: deepcopy(v)
|
|
939
|
+
for k, v in transformer_series.items()
|
|
940
|
+
if k in transformer_series_
|
|
941
|
+
}
|
|
942
|
+
)
|
|
943
|
+
|
|
944
|
+
series_not_in_transformer_series = (
|
|
945
|
+
set(series_names_in_) - set(transformer_series.keys())
|
|
946
|
+
) - {"_unknown_level"}
|
|
947
|
+
if series_not_in_transformer_series:
|
|
948
|
+
warnings.warn(
|
|
949
|
+
f"{series_not_in_transformer_series} not present in `transformer_series`."
|
|
950
|
+
f" No transformation is applied to these series.",
|
|
951
|
+
IgnoredArgumentWarning,
|
|
952
|
+
)
|
|
953
|
+
|
|
954
|
+
return transformer_series_
|