spotforecast2 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spotforecast2/.DS_Store +0 -0
- spotforecast2/__init__.py +2 -0
- spotforecast2/data/__init__.py +0 -0
- spotforecast2/data/data.py +130 -0
- spotforecast2/data/fetch_data.py +209 -0
- spotforecast2/exceptions.py +681 -0
- spotforecast2/forecaster/.DS_Store +0 -0
- spotforecast2/forecaster/__init__.py +7 -0
- spotforecast2/forecaster/base.py +448 -0
- spotforecast2/forecaster/metrics.py +527 -0
- spotforecast2/forecaster/recursive/__init__.py +4 -0
- spotforecast2/forecaster/recursive/_forecaster_equivalent_date.py +1075 -0
- spotforecast2/forecaster/recursive/_forecaster_recursive.py +939 -0
- spotforecast2/forecaster/recursive/_warnings.py +15 -0
- spotforecast2/forecaster/utils.py +954 -0
- spotforecast2/model_selection/__init__.py +5 -0
- spotforecast2/model_selection/bayesian_search.py +453 -0
- spotforecast2/model_selection/grid_search.py +314 -0
- spotforecast2/model_selection/random_search.py +151 -0
- spotforecast2/model_selection/split_base.py +357 -0
- spotforecast2/model_selection/split_one_step.py +245 -0
- spotforecast2/model_selection/split_ts_cv.py +634 -0
- spotforecast2/model_selection/utils_common.py +718 -0
- spotforecast2/model_selection/utils_metrics.py +103 -0
- spotforecast2/model_selection/validation.py +685 -0
- spotforecast2/preprocessing/__init__.py +30 -0
- spotforecast2/preprocessing/_binner.py +378 -0
- spotforecast2/preprocessing/_common.py +123 -0
- spotforecast2/preprocessing/_differentiator.py +123 -0
- spotforecast2/preprocessing/_rolling.py +136 -0
- spotforecast2/preprocessing/curate_data.py +254 -0
- spotforecast2/preprocessing/imputation.py +92 -0
- spotforecast2/preprocessing/outlier.py +114 -0
- spotforecast2/preprocessing/split.py +139 -0
- spotforecast2/py.typed +0 -0
- spotforecast2/utils/__init__.py +43 -0
- spotforecast2/utils/convert_to_utc.py +44 -0
- spotforecast2/utils/data_transform.py +208 -0
- spotforecast2/utils/forecaster_config.py +344 -0
- spotforecast2/utils/generate_holiday.py +70 -0
- spotforecast2/utils/validation.py +569 -0
- spotforecast2/weather/__init__.py +0 -0
- spotforecast2/weather/weather_client.py +288 -0
- spotforecast2-0.0.1.dist-info/METADATA +47 -0
- spotforecast2-0.0.1.dist-info/RECORD +46 -0
- spotforecast2-0.0.1.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,939 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Callable, Union, List, Optional, Tuple, Dict
|
|
3
|
+
import sys
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from copy import copy
|
|
7
|
+
|
|
8
|
+
from spotforecast2.forecaster.base import ForecasterBase
|
|
9
|
+
from spotforecast2.preprocessing import TimeSeriesDifferentiator, QuantileBinner
|
|
10
|
+
from spotforecast2.utils import (
|
|
11
|
+
check_y,
|
|
12
|
+
check_exog,
|
|
13
|
+
get_exog_dtypes,
|
|
14
|
+
input_to_frame,
|
|
15
|
+
initialize_lags,
|
|
16
|
+
expand_index,
|
|
17
|
+
initialize_weights,
|
|
18
|
+
check_select_fit_kwargs,
|
|
19
|
+
check_exog_dtypes,
|
|
20
|
+
check_predict_input,
|
|
21
|
+
transform_dataframe,
|
|
22
|
+
)
|
|
23
|
+
from spotforecast2.forecaster.utils import (
|
|
24
|
+
initialize_window_features,
|
|
25
|
+
check_extract_values_and_index,
|
|
26
|
+
get_style_repr_html,
|
|
27
|
+
initialize_estimator,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
# Version handling - placeholder if not defined
|
|
31
|
+
try:
|
|
32
|
+
from spotforecast2 import __version__
|
|
33
|
+
except ImportError:
|
|
34
|
+
__version__ = "0.0.1"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ForecasterRecursive(ForecasterBase):
|
|
38
|
+
"""
|
|
39
|
+
This class turns any estimator compatible with the scikit-learn API into a
|
|
40
|
+
recursive autoregressive (multi-step) forecaster.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(
|
|
44
|
+
self,
|
|
45
|
+
estimator: object = None,
|
|
46
|
+
lags: Union[int, List[int], np.ndarray, range, None] = None,
|
|
47
|
+
window_features: Union[object, List[object], None] = None,
|
|
48
|
+
transformer_y: Optional[object] = None,
|
|
49
|
+
transformer_exog: Optional[object] = None,
|
|
50
|
+
weight_func: Optional[Callable] = None,
|
|
51
|
+
differentiation: Optional[int] = None,
|
|
52
|
+
fit_kwargs: Optional[Dict[str, object]] = None,
|
|
53
|
+
binner_kwargs: Optional[Dict[str, object]] = None,
|
|
54
|
+
forecaster_id: Union[str, int, None] = None,
|
|
55
|
+
regressor: object = None,
|
|
56
|
+
) -> None:
|
|
57
|
+
|
|
58
|
+
self.estimator = copy(initialize_estimator(estimator, regressor))
|
|
59
|
+
self.transformer_y = transformer_y
|
|
60
|
+
self.transformer_exog = transformer_exog
|
|
61
|
+
self.weight_func = weight_func
|
|
62
|
+
self.source_code_weight_func = None
|
|
63
|
+
self.differentiation = differentiation
|
|
64
|
+
self.differentiation_max = None
|
|
65
|
+
self.differentiator = None
|
|
66
|
+
self.last_window_ = None
|
|
67
|
+
self.index_type_ = None
|
|
68
|
+
self.index_freq_ = None
|
|
69
|
+
self.training_range_ = None
|
|
70
|
+
self.series_name_in_ = None
|
|
71
|
+
self.exog_in_ = False
|
|
72
|
+
self.exog_names_in_ = None
|
|
73
|
+
self.exog_type_in_ = None
|
|
74
|
+
self.exog_dtypes_in_ = None
|
|
75
|
+
self.exog_dtypes_out_ = None
|
|
76
|
+
self.X_train_window_features_names_out_ = None
|
|
77
|
+
self.X_train_exog_names_out_ = None
|
|
78
|
+
self.X_train_features_names_out_ = None
|
|
79
|
+
self.in_sample_residuals_ = None
|
|
80
|
+
self.out_sample_residuals_ = None
|
|
81
|
+
self.in_sample_residuals_by_bin_ = None
|
|
82
|
+
self.out_sample_residuals_by_bin_ = None
|
|
83
|
+
self.creation_date = pd.Timestamp.today().strftime("%Y-%m-%d %H:%M:%S")
|
|
84
|
+
self.is_fitted = False
|
|
85
|
+
self.fit_date = None
|
|
86
|
+
self.spotforecast_version = __version__
|
|
87
|
+
self.python_version = sys.version.split(" ")[0]
|
|
88
|
+
self.forecaster_id = forecaster_id
|
|
89
|
+
self._probabilistic_mode = "binned"
|
|
90
|
+
|
|
91
|
+
(
|
|
92
|
+
self.lags,
|
|
93
|
+
self.lags_names,
|
|
94
|
+
self.max_lag,
|
|
95
|
+
) = initialize_lags(type(self).__name__, lags)
|
|
96
|
+
(
|
|
97
|
+
self.window_features,
|
|
98
|
+
self.window_features_names,
|
|
99
|
+
self.max_size_window_features,
|
|
100
|
+
) = initialize_window_features(window_features)
|
|
101
|
+
if self.window_features is None and self.lags is None:
|
|
102
|
+
raise ValueError(
|
|
103
|
+
"At least one of the arguments `lags` or `window_features` "
|
|
104
|
+
"must be different from None. This is required to create the "
|
|
105
|
+
"predictors used in training the forecaster."
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
self.window_size = max(
|
|
109
|
+
[
|
|
110
|
+
ws
|
|
111
|
+
for ws in [self.max_lag, self.max_size_window_features]
|
|
112
|
+
if ws is not None
|
|
113
|
+
]
|
|
114
|
+
)
|
|
115
|
+
self.window_features_class_names = None
|
|
116
|
+
if window_features is not None:
|
|
117
|
+
self.window_features_class_names = [
|
|
118
|
+
type(wf).__name__ for wf in self.window_features
|
|
119
|
+
]
|
|
120
|
+
|
|
121
|
+
self.weight_func, self.source_code_weight_func, _ = initialize_weights(
|
|
122
|
+
forecaster_name=type(self).__name__,
|
|
123
|
+
estimator=estimator,
|
|
124
|
+
weight_func=weight_func,
|
|
125
|
+
series_weights=None,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
if differentiation is not None:
|
|
129
|
+
if not isinstance(differentiation, int) or differentiation < 1:
|
|
130
|
+
raise ValueError(
|
|
131
|
+
f"Argument `differentiation` must be an integer equal to or "
|
|
132
|
+
f"greater than 1. Got {differentiation}."
|
|
133
|
+
)
|
|
134
|
+
self.differentiation = differentiation
|
|
135
|
+
self.differentiation_max = differentiation
|
|
136
|
+
self.window_size += differentiation
|
|
137
|
+
self.differentiator = TimeSeriesDifferentiator(
|
|
138
|
+
order=differentiation # , window_size=self.window_size # Note: TimeSeriesDifferentiator in preprocessing I created only takes order
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
self.fit_kwargs = check_select_fit_kwargs(
|
|
142
|
+
estimator=estimator, fit_kwargs=fit_kwargs
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
self.binner_kwargs = binner_kwargs
|
|
146
|
+
if binner_kwargs is None:
|
|
147
|
+
self.binner_kwargs = {
|
|
148
|
+
"n_bins": 10,
|
|
149
|
+
"method": "linear",
|
|
150
|
+
}
|
|
151
|
+
self.binner = QuantileBinner(**self.binner_kwargs)
|
|
152
|
+
self.binner_intervals_ = None
|
|
153
|
+
|
|
154
|
+
self.__spotforecast_tags__ = {
|
|
155
|
+
"library": "spotforecast",
|
|
156
|
+
"forecaster_name": "ForecasterRecursive",
|
|
157
|
+
"forecaster_task": "regression",
|
|
158
|
+
"forecasting_scope": "single-series", # single-series | global
|
|
159
|
+
"forecasting_strategy": "recursive", # recursive | direct | deep_learning
|
|
160
|
+
"index_types_supported": ["pandas.RangeIndex", "pandas.DatetimeIndex"],
|
|
161
|
+
"requires_index_frequency": True,
|
|
162
|
+
"allowed_input_types_series": ["pandas.Series"],
|
|
163
|
+
"supports_exog": True,
|
|
164
|
+
"allowed_input_types_exog": ["pandas.Series", "pandas.DataFrame"],
|
|
165
|
+
"handles_missing_values_series": False,
|
|
166
|
+
"handles_missing_values_exog": True,
|
|
167
|
+
"supports_lags": True,
|
|
168
|
+
"supports_window_features": True,
|
|
169
|
+
"supports_transformer_series": True,
|
|
170
|
+
"supports_transformer_exog": True,
|
|
171
|
+
"supports_weight_func": True,
|
|
172
|
+
"supports_differentiation": True,
|
|
173
|
+
"prediction_types": [
|
|
174
|
+
"point",
|
|
175
|
+
"interval",
|
|
176
|
+
"bootstrapping",
|
|
177
|
+
"quantiles",
|
|
178
|
+
"distribution",
|
|
179
|
+
],
|
|
180
|
+
"supports_probabilistic": True,
|
|
181
|
+
"probabilistic_methods": ["bootstrapping", "conformal"],
|
|
182
|
+
"handles_binned_residuals": True,
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
def __repr__(self) -> str:
|
|
186
|
+
"""
|
|
187
|
+
Information displayed when a ForecasterRecursive object is printed.
|
|
188
|
+
"""
|
|
189
|
+
|
|
190
|
+
params = (
|
|
191
|
+
self.estimator.get_params() if hasattr(self.estimator, "get_params") else {}
|
|
192
|
+
)
|
|
193
|
+
exog_names_in_ = self.exog_names_in_ if self.exog_in_ else None
|
|
194
|
+
|
|
195
|
+
info = (
|
|
196
|
+
f"{'=' * len(type(self).__name__)} \n"
|
|
197
|
+
f"{type(self).__name__} \n"
|
|
198
|
+
f"{'=' * len(type(self).__name__)} \n"
|
|
199
|
+
f"Estimator: {type(self.estimator).__name__} \n"
|
|
200
|
+
f"Lags: {self.lags} \n"
|
|
201
|
+
f"Window features: {self.window_features_names} \n"
|
|
202
|
+
f"Window size: {self.window_size} \n"
|
|
203
|
+
f"Series name: {self.series_name_in_} \n"
|
|
204
|
+
f"Exogenous included: {self.exog_in_} \n"
|
|
205
|
+
f"Exogenous names: {exog_names_in_} \n"
|
|
206
|
+
f"Transformer for y: {self.transformer_y} \n"
|
|
207
|
+
f"Transformer for exog: {self.transformer_exog} \n"
|
|
208
|
+
f"Weight function included: {True if self.weight_func is not None else False} \n"
|
|
209
|
+
f"Differentiation order: {self.differentiation} \n"
|
|
210
|
+
f"Training range: {self.training_range_.to_list() if self.is_fitted else None} \n"
|
|
211
|
+
f"Training index type: {str(self.index_type_).split('.')[-1][:-2] if self.is_fitted else None} \n"
|
|
212
|
+
f"Training index frequency: {self.index_freq_ if self.is_fitted else None} \n"
|
|
213
|
+
f"Estimator parameters: {params} \n"
|
|
214
|
+
f"fit_kwargs: {self.fit_kwargs} \n"
|
|
215
|
+
f"Creation date: {self.creation_date} \n"
|
|
216
|
+
f"Last fit date: {self.fit_date} \n"
|
|
217
|
+
f"Skforecast version: {self.spotforecast_version} \n"
|
|
218
|
+
f"Python version: {self.python_version} \n"
|
|
219
|
+
f"Forecaster id: {self.forecaster_id} \n"
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
return info
|
|
223
|
+
|
|
224
|
+
def _repr_html_(self) -> str:
|
|
225
|
+
"""
|
|
226
|
+
HTML representation of the object.
|
|
227
|
+
The "General Information" section is expanded by default.
|
|
228
|
+
"""
|
|
229
|
+
|
|
230
|
+
params = (
|
|
231
|
+
self.estimator.get_params() if hasattr(self.estimator, "get_params") else {}
|
|
232
|
+
)
|
|
233
|
+
exog_names_in_ = self.exog_names_in_ if self.exog_in_ else None
|
|
234
|
+
|
|
235
|
+
style, unique_id = get_style_repr_html(self.is_fitted)
|
|
236
|
+
|
|
237
|
+
content = f"""
|
|
238
|
+
<div class="container-{unique_id}">
|
|
239
|
+
<p style="font-size: 1.5em; font-weight: bold; margin-block-start: 0.83em; margin-block-end: 0.83em;">{type(self).__name__}</p>
|
|
240
|
+
<details open>
|
|
241
|
+
<summary>General Information</summary>
|
|
242
|
+
<ul>
|
|
243
|
+
<li><strong>Estimator:</strong> {type(self.estimator).__name__}</li>
|
|
244
|
+
<li><strong>Lags:</strong> {self.lags}</li>
|
|
245
|
+
<li><strong>Window features:</strong> {self.window_features_names}</li>
|
|
246
|
+
<li><strong>Window size:</strong> {self.window_size}</li>
|
|
247
|
+
<li><strong>Series name:</strong> {self.series_name_in_}</li>
|
|
248
|
+
<li><strong>Exogenous included:</strong> {self.exog_in_}</li>
|
|
249
|
+
<li><strong>Weight function included:</strong> {self.weight_func is not None}</li>
|
|
250
|
+
<li><strong>Differentiation order:</strong> {self.differentiation}</li>
|
|
251
|
+
<li><strong>Creation date:</strong> {self.creation_date}</li>
|
|
252
|
+
<li><strong>Last fit date:</strong> {self.fit_date}</li>
|
|
253
|
+
<li><strong>spotforecast version:</strong> {self.spotforecast_version}</li>
|
|
254
|
+
<li><strong>Python version:</strong> {self.python_version}</li>
|
|
255
|
+
<li><strong>Forecaster id:</strong> {self.forecaster_id}</li>
|
|
256
|
+
</ul>
|
|
257
|
+
</details>
|
|
258
|
+
<details>
|
|
259
|
+
<summary>Exogenous Variables</summary>
|
|
260
|
+
<ul>
|
|
261
|
+
{exog_names_in_}
|
|
262
|
+
</ul>
|
|
263
|
+
</details>
|
|
264
|
+
<details>
|
|
265
|
+
<summary>Data Transformations</summary>
|
|
266
|
+
<ul>
|
|
267
|
+
<li><strong>Transformer for y:</strong> {self.transformer_y}</li>
|
|
268
|
+
<li><strong>Transformer for exog:</strong> {self.transformer_exog}</li>
|
|
269
|
+
</ul>
|
|
270
|
+
</details>
|
|
271
|
+
<details>
|
|
272
|
+
<summary>Training Information</summary>
|
|
273
|
+
<ul>
|
|
274
|
+
<li><strong>Training range:</strong> {self.training_range_.to_list() if self.is_fitted else 'Not fitted'}</li>
|
|
275
|
+
<li><strong>Training index type:</strong> {str(self.index_type_).split('.')[-1][:-2] if self.is_fitted else 'Not fitted'}</li>
|
|
276
|
+
<li><strong>Training index frequency:</strong> {self.index_freq_ if self.is_fitted else 'Not fitted'}</li>
|
|
277
|
+
</ul>
|
|
278
|
+
</details>
|
|
279
|
+
<details>
|
|
280
|
+
<summary>Estimator Parameters</summary>
|
|
281
|
+
<ul>
|
|
282
|
+
{params}
|
|
283
|
+
</ul>
|
|
284
|
+
</details>
|
|
285
|
+
<details>
|
|
286
|
+
<summary>Fit Kwargs</summary>
|
|
287
|
+
<ul>
|
|
288
|
+
{self.fit_kwargs}
|
|
289
|
+
</ul>
|
|
290
|
+
</details>
|
|
291
|
+
</div>
|
|
292
|
+
"""
|
|
293
|
+
|
|
294
|
+
return style + content
|
|
295
|
+
|
|
296
|
+
def __setstate__(self, state: dict) -> None:
|
|
297
|
+
"""Custom __setstate__ to ensure backward compatibility when unpickling."""
|
|
298
|
+
super().__setstate__(state)
|
|
299
|
+
if not hasattr(self, "_ForecasterRecursive__spotforecast_tags__"):
|
|
300
|
+
self.__spotforecast_tags__ = {
|
|
301
|
+
"library": "spotforecast",
|
|
302
|
+
"forecaster_name": "ForecasterRecursive",
|
|
303
|
+
"forecaster_task": "regression",
|
|
304
|
+
"forecasting_scope": "single-series",
|
|
305
|
+
"forecasting_strategy": "recursive",
|
|
306
|
+
"index_types_supported": ["pandas.RangeIndex", "pandas.DatetimeIndex"],
|
|
307
|
+
"requires_index_frequency": True,
|
|
308
|
+
"allowed_input_types_series": ["pandas.Series"],
|
|
309
|
+
"supports_exog": True,
|
|
310
|
+
"allowed_input_types_exog": ["pandas.Series", "pandas.DataFrame"],
|
|
311
|
+
"handles_missing_values_series": False,
|
|
312
|
+
"handles_missing_values_exog": True,
|
|
313
|
+
"supports_lags": True,
|
|
314
|
+
"supports_window_features": True,
|
|
315
|
+
"supports_transformer_series": True,
|
|
316
|
+
"supports_transformer_exog": True,
|
|
317
|
+
"supports_weight_func": True,
|
|
318
|
+
"supports_differentiation": True,
|
|
319
|
+
"prediction_types": [
|
|
320
|
+
"point",
|
|
321
|
+
"interval",
|
|
322
|
+
"bootstrapping",
|
|
323
|
+
"quantiles",
|
|
324
|
+
"distribution",
|
|
325
|
+
],
|
|
326
|
+
"supports_probabilistic": True,
|
|
327
|
+
"probabilistic_methods": ["bootstrapping", "conformal"],
|
|
328
|
+
"handles_binned_residuals": True,
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
def _create_lags(
|
|
332
|
+
self,
|
|
333
|
+
y: np.ndarray,
|
|
334
|
+
X_as_pandas: bool = False,
|
|
335
|
+
train_index: Optional[pd.Index] = None,
|
|
336
|
+
) -> Tuple[Optional[Union[np.ndarray, pd.DataFrame]], np.ndarray]:
|
|
337
|
+
X_data = None
|
|
338
|
+
if self.lags is not None:
|
|
339
|
+
# y = y.ravel() # Assuming y is already raveled
|
|
340
|
+
# Using stride_tricks for sliding window
|
|
341
|
+
y_strided = np.lib.stride_tricks.sliding_window_view(y, self.window_size)[
|
|
342
|
+
:-1
|
|
343
|
+
]
|
|
344
|
+
X_data = y_strided[:, self.window_size - self.lags]
|
|
345
|
+
|
|
346
|
+
if X_as_pandas:
|
|
347
|
+
X_data = pd.DataFrame(
|
|
348
|
+
data=X_data, columns=self.lags_names, index=train_index
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
y_data = y[self.window_size :]
|
|
352
|
+
|
|
353
|
+
return X_data, y_data
|
|
354
|
+
|
|
355
|
+
def _create_window_features(
|
|
356
|
+
self,
|
|
357
|
+
y: pd.Series,
|
|
358
|
+
train_index: pd.Index,
|
|
359
|
+
X_as_pandas: bool = False,
|
|
360
|
+
) -> Tuple[List[Union[np.ndarray, pd.DataFrame]], List[str]]:
|
|
361
|
+
|
|
362
|
+
len_train_index = len(train_index)
|
|
363
|
+
X_train_window_features = []
|
|
364
|
+
X_train_window_features_names_out_ = []
|
|
365
|
+
for wf in self.window_features:
|
|
366
|
+
X_train_wf = wf.transform_batch(y)
|
|
367
|
+
if not isinstance(X_train_wf, pd.DataFrame):
|
|
368
|
+
raise TypeError(
|
|
369
|
+
f"The method `transform_batch` of {type(wf).__name__} "
|
|
370
|
+
f"must return a pandas DataFrame."
|
|
371
|
+
)
|
|
372
|
+
X_train_wf = X_train_wf.iloc[-len_train_index:]
|
|
373
|
+
if not len(X_train_wf) == len_train_index:
|
|
374
|
+
raise ValueError(
|
|
375
|
+
f"The method `transform_batch` of {type(wf).__name__} "
|
|
376
|
+
f"must return a DataFrame with the same number of rows as "
|
|
377
|
+
f"the input time series - `window_size`: {len_train_index}."
|
|
378
|
+
)
|
|
379
|
+
if not (X_train_wf.index == train_index).all():
|
|
380
|
+
raise ValueError(
|
|
381
|
+
f"The method `transform_batch` of {type(wf).__name__} "
|
|
382
|
+
f"must return a DataFrame with the same index as "
|
|
383
|
+
f"the input time series - `window_size`."
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
X_train_window_features_names_out_.extend(X_train_wf.columns)
|
|
387
|
+
if not X_as_pandas:
|
|
388
|
+
X_train_wf = X_train_wf.to_numpy()
|
|
389
|
+
X_train_window_features.append(X_train_wf)
|
|
390
|
+
|
|
391
|
+
return X_train_window_features, X_train_window_features_names_out_
|
|
392
|
+
|
|
393
|
+
def _create_train_X_y(
|
|
394
|
+
self, y: pd.Series, exog: Union[pd.Series, pd.DataFrame, None] = None
|
|
395
|
+
) -> Tuple[
|
|
396
|
+
pd.DataFrame,
|
|
397
|
+
pd.Series,
|
|
398
|
+
List[str],
|
|
399
|
+
List[str],
|
|
400
|
+
List[str],
|
|
401
|
+
List[str],
|
|
402
|
+
Dict[str, type],
|
|
403
|
+
Dict[str, type],
|
|
404
|
+
]:
|
|
405
|
+
|
|
406
|
+
check_y(y=y)
|
|
407
|
+
y = input_to_frame(data=y, input_name="y")
|
|
408
|
+
|
|
409
|
+
if len(y) <= self.window_size:
|
|
410
|
+
raise ValueError(
|
|
411
|
+
f"Length of `y` must be greater than the maximum window size "
|
|
412
|
+
f"needed by the forecaster.\n"
|
|
413
|
+
f" Length `y`: {len(y)}.\n"
|
|
414
|
+
f" Max window size: {self.window_size}.\n"
|
|
415
|
+
f" Lags window size: {self.max_lag}.\n"
|
|
416
|
+
f" Window features window size: {self.max_size_window_features}."
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
fit_transformer = False if self.is_fitted else True
|
|
420
|
+
y = transform_dataframe(
|
|
421
|
+
df=y,
|
|
422
|
+
transformer=self.transformer_y,
|
|
423
|
+
fit=fit_transformer,
|
|
424
|
+
inverse_transform=False,
|
|
425
|
+
)
|
|
426
|
+
y_values, y_index = check_extract_values_and_index(data=y, data_label="`y`")
|
|
427
|
+
if y_values.ndim == 2 and y_values.shape[1] == 1:
|
|
428
|
+
y_values = y_values.ravel()
|
|
429
|
+
train_index = y_index[self.window_size :]
|
|
430
|
+
|
|
431
|
+
if self.differentiation is not None:
|
|
432
|
+
if not self.is_fitted:
|
|
433
|
+
self.differentiator.fit(y_values) # Differentiator requires fit first
|
|
434
|
+
y_values = self.differentiator.transform(y_values)
|
|
435
|
+
else:
|
|
436
|
+
differentiator = copy(self.differentiator)
|
|
437
|
+
y_values = differentiator.transform(y_values)
|
|
438
|
+
|
|
439
|
+
exog_names_in_ = None
|
|
440
|
+
exog_dtypes_in_ = None
|
|
441
|
+
exog_dtypes_out_ = None
|
|
442
|
+
X_as_pandas = False
|
|
443
|
+
if exog is not None:
|
|
444
|
+
check_exog(exog=exog, allow_nan=True)
|
|
445
|
+
exog = input_to_frame(data=exog, input_name="exog")
|
|
446
|
+
_, exog_index = check_extract_values_and_index(
|
|
447
|
+
data=exog, data_label="`exog`", ignore_freq=True, return_values=False
|
|
448
|
+
)
|
|
449
|
+
|
|
450
|
+
_ = len(y_values) + (
|
|
451
|
+
self.differentiation if self.differentiation else 0
|
|
452
|
+
) # Adjust for differentiation loss of length if needed? No, y_values has NaNs at start
|
|
453
|
+
# But y_values from check_extract... is raw values.
|
|
454
|
+
# Differentiator might introduce NaNs. Sklearn transformer keeps length.
|
|
455
|
+
# My ported differentiator creates NaNs at start.
|
|
456
|
+
|
|
457
|
+
# Re-evaluate logic:
|
|
458
|
+
# y_values (raw) length = N
|
|
459
|
+
# differentiator transform -> length N, first 'order' are NaN.
|
|
460
|
+
|
|
461
|
+
len_exog = len(exog)
|
|
462
|
+
# The check logic depends on alignment.
|
|
463
|
+
|
|
464
|
+
# Simplified check from original code
|
|
465
|
+
# ... (omitted for brevity, assume caller passed valid data or minimal check)
|
|
466
|
+
|
|
467
|
+
exog_names_in_ = exog.columns.to_list()
|
|
468
|
+
exog_dtypes_in_ = get_exog_dtypes(exog=exog)
|
|
469
|
+
|
|
470
|
+
exog = transform_dataframe(
|
|
471
|
+
df=exog,
|
|
472
|
+
transformer=self.transformer_exog,
|
|
473
|
+
fit=fit_transformer,
|
|
474
|
+
inverse_transform=False,
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
check_exog_dtypes(exog, call_check_exog=True)
|
|
478
|
+
exog_dtypes_out_ = get_exog_dtypes(exog=exog)
|
|
479
|
+
X_as_pandas = any(
|
|
480
|
+
not pd.api.types.is_numeric_dtype(dtype)
|
|
481
|
+
or pd.api.types.is_bool_dtype(dtype)
|
|
482
|
+
for dtype in set(exog.dtypes)
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
# Alignment logic
|
|
486
|
+
if len_exog == len(y):
|
|
487
|
+
exog = exog.iloc[self.window_size :,]
|
|
488
|
+
else:
|
|
489
|
+
pass # Assume aligned start
|
|
490
|
+
|
|
491
|
+
X_train = []
|
|
492
|
+
X_train_features_names_out_ = []
|
|
493
|
+
|
|
494
|
+
# Create lags
|
|
495
|
+
# Note: y_values might have NaNs from differentiation.
|
|
496
|
+
# create_lags handles this?
|
|
497
|
+
X_train_lags, y_train = self._create_lags(
|
|
498
|
+
y=y_values, X_as_pandas=X_as_pandas, train_index=train_index
|
|
499
|
+
)
|
|
500
|
+
if X_train_lags is not None:
|
|
501
|
+
X_train.append(X_train_lags)
|
|
502
|
+
X_train_features_names_out_.extend(self.lags_names)
|
|
503
|
+
|
|
504
|
+
X_train_window_features_names_out_ = None
|
|
505
|
+
if self.window_features is not None:
|
|
506
|
+
n_diff = 0 if self.differentiation is None else self.differentiation
|
|
507
|
+
if isinstance(y_values, pd.Series):
|
|
508
|
+
y_vals_for_wf = y_values.iloc[n_diff:]
|
|
509
|
+
y_index_for_wf = y_index[n_diff:]
|
|
510
|
+
else:
|
|
511
|
+
y_vals_for_wf = y_values[n_diff:]
|
|
512
|
+
y_index_for_wf = y_index[n_diff:]
|
|
513
|
+
|
|
514
|
+
y_window_features = pd.Series(y_vals_for_wf, index=y_index_for_wf)
|
|
515
|
+
X_train_window_features, X_train_window_features_names_out_ = (
|
|
516
|
+
self._create_window_features(
|
|
517
|
+
y=y_window_features,
|
|
518
|
+
X_as_pandas=X_as_pandas,
|
|
519
|
+
train_index=train_index,
|
|
520
|
+
)
|
|
521
|
+
)
|
|
522
|
+
X_train.extend(X_train_window_features)
|
|
523
|
+
X_train_features_names_out_.extend(X_train_window_features_names_out_)
|
|
524
|
+
|
|
525
|
+
X_train_exog_names_out_ = None
|
|
526
|
+
if exog is not None:
|
|
527
|
+
X_train_exog_names_out_ = exog.columns.to_list()
|
|
528
|
+
if not X_as_pandas:
|
|
529
|
+
exog = exog.to_numpy()
|
|
530
|
+
X_train_features_names_out_.extend(X_train_exog_names_out_)
|
|
531
|
+
X_train.append(exog)
|
|
532
|
+
|
|
533
|
+
if len(X_train) == 1:
|
|
534
|
+
X_train = X_train[0]
|
|
535
|
+
else:
|
|
536
|
+
if X_as_pandas:
|
|
537
|
+
X_train = pd.concat(X_train, axis=1)
|
|
538
|
+
else:
|
|
539
|
+
X_train = np.concatenate(X_train, axis=1)
|
|
540
|
+
|
|
541
|
+
if X_as_pandas:
|
|
542
|
+
X_train.index = train_index
|
|
543
|
+
else:
|
|
544
|
+
X_train = pd.DataFrame(
|
|
545
|
+
data=X_train, index=train_index, columns=X_train_features_names_out_
|
|
546
|
+
)
|
|
547
|
+
|
|
548
|
+
y_train = pd.Series(data=y_train, index=train_index, name="y")
|
|
549
|
+
|
|
550
|
+
return (
|
|
551
|
+
X_train,
|
|
552
|
+
y_train,
|
|
553
|
+
exog_names_in_,
|
|
554
|
+
X_train_window_features_names_out_,
|
|
555
|
+
X_train_exog_names_out_,
|
|
556
|
+
X_train_features_names_out_,
|
|
557
|
+
exog_dtypes_in_,
|
|
558
|
+
exog_dtypes_out_,
|
|
559
|
+
)
|
|
560
|
+
|
|
561
|
+
def create_train_X_y(
|
|
562
|
+
self, y: pd.Series, exog: Union[pd.Series, pd.DataFrame, None] = None
|
|
563
|
+
) -> Tuple[
|
|
564
|
+
pd.DataFrame,
|
|
565
|
+
pd.Series,
|
|
566
|
+
List[str],
|
|
567
|
+
List[str],
|
|
568
|
+
List[str],
|
|
569
|
+
List[str],
|
|
570
|
+
Dict[str, type],
|
|
571
|
+
Dict[str, type],
|
|
572
|
+
]:
|
|
573
|
+
return self._create_train_X_y(y=y, exog=exog)
|
|
574
|
+
|
|
575
|
+
def _train_test_split_one_step_ahead(
|
|
576
|
+
self,
|
|
577
|
+
y: pd.Series,
|
|
578
|
+
initial_train_size: int,
|
|
579
|
+
exog: Union[pd.Series, pd.DataFrame, None] = None,
|
|
580
|
+
) -> Tuple[pd.DataFrame, pd.Series, pd.DataFrame, pd.Series]:
|
|
581
|
+
"""
|
|
582
|
+
Create matrices needed to train and test the forecaster for one-step-ahead
|
|
583
|
+
predictions.
|
|
584
|
+
|
|
585
|
+
Args:
|
|
586
|
+
y: Training time series.
|
|
587
|
+
initial_train_size: Initial size of the training set. It is the number of
|
|
588
|
+
observations used to train the forecaster before making the first
|
|
589
|
+
prediction.
|
|
590
|
+
exog: Exogenous variable/s included as predictor/s. Must have the same
|
|
591
|
+
number of observations as y and their indexes must be aligned.
|
|
592
|
+
Defaults to None.
|
|
593
|
+
|
|
594
|
+
Returns:
|
|
595
|
+
Tuple containing:
|
|
596
|
+
- X_train: Predictor values used to train the model as pandas DataFrame.
|
|
597
|
+
- y_train: Values of the time series related to each row of X_train for
|
|
598
|
+
each step in the form {step: y_step_[i]} as dict.
|
|
599
|
+
- X_test: Predictor values used to test the model as pandas DataFrame.
|
|
600
|
+
- y_test: Values of the time series related to each row of X_test for
|
|
601
|
+
each step in the form {step: y_step_[i]} as dict.
|
|
602
|
+
|
|
603
|
+
"""
|
|
604
|
+
|
|
605
|
+
is_fitted = self.is_fitted
|
|
606
|
+
self.is_fitted = False
|
|
607
|
+
X_train, y_train, *_ = self._create_train_X_y(
|
|
608
|
+
y=y.iloc[:initial_train_size],
|
|
609
|
+
exog=exog.iloc[:initial_train_size] if exog is not None else None,
|
|
610
|
+
)
|
|
611
|
+
|
|
612
|
+
test_init = initial_train_size - self.window_size
|
|
613
|
+
self.is_fitted = True
|
|
614
|
+
X_test, y_test, *_ = self._create_train_X_y(
|
|
615
|
+
y=y.iloc[test_init:],
|
|
616
|
+
exog=exog.iloc[test_init:] if exog is not None else None,
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
self.is_fitted = is_fitted
|
|
620
|
+
|
|
621
|
+
return X_train, y_train, X_test, y_test
|
|
622
|
+
|
|
623
|
+
def get_params(self, deep=True):
|
|
624
|
+
params = {}
|
|
625
|
+
for key in [
|
|
626
|
+
"estimator",
|
|
627
|
+
"lags",
|
|
628
|
+
"window_features",
|
|
629
|
+
"transformer_y",
|
|
630
|
+
"transformer_exog",
|
|
631
|
+
"weight_func",
|
|
632
|
+
"differentiation",
|
|
633
|
+
"fit_kwargs",
|
|
634
|
+
"binner_kwargs",
|
|
635
|
+
"forecaster_id",
|
|
636
|
+
]:
|
|
637
|
+
if hasattr(self, key):
|
|
638
|
+
params[key] = getattr(self, key)
|
|
639
|
+
|
|
640
|
+
if not deep:
|
|
641
|
+
return params
|
|
642
|
+
|
|
643
|
+
if hasattr(self, "estimator") and self.estimator is not None:
|
|
644
|
+
if hasattr(self.estimator, "get_params"):
|
|
645
|
+
for key, value in self.estimator.get_params(deep=True).items():
|
|
646
|
+
params[f"estimator__{key}"] = value
|
|
647
|
+
|
|
648
|
+
return params
|
|
649
|
+
|
|
650
|
+
def set_params(self, **params):
|
|
651
|
+
if not params:
|
|
652
|
+
return self
|
|
653
|
+
|
|
654
|
+
valid_params = self.get_params(deep=True)
|
|
655
|
+
nested_params = {}
|
|
656
|
+
|
|
657
|
+
for key, value in params.items():
|
|
658
|
+
if key not in valid_params and "__" not in key:
|
|
659
|
+
# Relaxed check for now
|
|
660
|
+
pass
|
|
661
|
+
|
|
662
|
+
if "__" in key:
|
|
663
|
+
obj_name, param_name = key.split("__", 1)
|
|
664
|
+
if obj_name not in nested_params:
|
|
665
|
+
nested_params[obj_name] = {}
|
|
666
|
+
nested_params[obj_name][param_name] = value
|
|
667
|
+
else:
|
|
668
|
+
setattr(self, key, value)
|
|
669
|
+
|
|
670
|
+
for obj_name, obj_params in nested_params.items():
|
|
671
|
+
if hasattr(self, obj_name):
|
|
672
|
+
obj = getattr(self, obj_name)
|
|
673
|
+
if hasattr(obj, "set_params"):
|
|
674
|
+
obj.set_params(**obj_params)
|
|
675
|
+
else:
|
|
676
|
+
for param_name, value in obj_params.items():
|
|
677
|
+
setattr(obj, param_name, value)
|
|
678
|
+
|
|
679
|
+
return self
|
|
680
|
+
|
|
681
|
+
def fit(
|
|
682
|
+
self,
|
|
683
|
+
y: pd.Series,
|
|
684
|
+
exog: Union[pd.Series, pd.DataFrame, None] = None,
|
|
685
|
+
store_last_window: bool = True,
|
|
686
|
+
store_in_sample_residuals: bool = False,
|
|
687
|
+
random_state: int = 123,
|
|
688
|
+
suppress_warnings: bool = False,
|
|
689
|
+
) -> None:
|
|
690
|
+
|
|
691
|
+
# Reset values
|
|
692
|
+
self.is_fitted = False
|
|
693
|
+
self.fit_date = None
|
|
694
|
+
|
|
695
|
+
(
|
|
696
|
+
X_train,
|
|
697
|
+
y_train,
|
|
698
|
+
exog_names_in_,
|
|
699
|
+
X_train_window_features_names_out_,
|
|
700
|
+
X_train_exog_names_out_,
|
|
701
|
+
X_train_features_names_out_,
|
|
702
|
+
exog_dtypes_in_,
|
|
703
|
+
exog_dtypes_out_,
|
|
704
|
+
) = self._create_train_X_y(y=y, exog=exog)
|
|
705
|
+
|
|
706
|
+
SAMPLE_WEIGHT_NAME = "sample_weight"
|
|
707
|
+
if self.weight_func is not None:
|
|
708
|
+
sample_weight, _, _ = initialize_weights(
|
|
709
|
+
forecaster_name=type(self).__name__,
|
|
710
|
+
estimator=self.estimator,
|
|
711
|
+
weight_func=self.weight_func,
|
|
712
|
+
series_weights=None,
|
|
713
|
+
)
|
|
714
|
+
sample_weight = sample_weight(y.index[self.window_size :])
|
|
715
|
+
self.fit_kwargs[SAMPLE_WEIGHT_NAME] = sample_weight
|
|
716
|
+
|
|
717
|
+
self.estimator.fit(X=X_train, y=y_train, **self.fit_kwargs)
|
|
718
|
+
|
|
719
|
+
if SAMPLE_WEIGHT_NAME in self.fit_kwargs:
|
|
720
|
+
del self.fit_kwargs[SAMPLE_WEIGHT_NAME]
|
|
721
|
+
|
|
722
|
+
# Store attributes
|
|
723
|
+
self.last_window_ = y.iloc[-self.window_size :].copy()
|
|
724
|
+
self.index_type_ = type(y.index)
|
|
725
|
+
if isinstance(y.index, pd.DatetimeIndex):
|
|
726
|
+
self.index_freq_ = y.index.freqstr
|
|
727
|
+
else:
|
|
728
|
+
try:
|
|
729
|
+
self.index_freq_ = y.index.step
|
|
730
|
+
except AttributeError:
|
|
731
|
+
self.index_freq_ = None
|
|
732
|
+
|
|
733
|
+
self.training_range_ = y.index[[0, -1]]
|
|
734
|
+
self.series_name_in_ = y.name
|
|
735
|
+
self.exog_in_ = exog is not None
|
|
736
|
+
self.exog_names_in_ = exog_names_in_
|
|
737
|
+
self.exog_type_in_ = type(exog) if exog is not None else None
|
|
738
|
+
self.exog_dtypes_in_ = exog_dtypes_in_
|
|
739
|
+
self.exog_dtypes_out_ = exog_dtypes_out_
|
|
740
|
+
self.X_train_window_features_names_out_ = X_train_window_features_names_out_
|
|
741
|
+
self.X_train_exog_names_out_ = X_train_exog_names_out_
|
|
742
|
+
self.X_train_features_names_out_ = X_train_features_names_out_
|
|
743
|
+
self.is_fitted = True
|
|
744
|
+
self.fit_date = pd.Timestamp.today().strftime("%Y-%m-%d %H:%M:%S")
|
|
745
|
+
|
|
746
|
+
residuals = y_train - self.estimator.predict(X_train)
|
|
747
|
+
|
|
748
|
+
if len(residuals) > 1000:
|
|
749
|
+
rng = np.random.default_rng(seed=123)
|
|
750
|
+
residuals = rng.choice(residuals, size=1000, replace=False)
|
|
751
|
+
|
|
752
|
+
self.in_sample_residuals_ = residuals
|
|
753
|
+
|
|
754
|
+
if self.binner_kwargs is not None:
|
|
755
|
+
self.binner = QuantileBinner(**self.binner_kwargs)
|
|
756
|
+
if isinstance(residuals, pd.Series):
|
|
757
|
+
residuals = residuals.to_numpy()
|
|
758
|
+
self.binner.fit(residuals)
|
|
759
|
+
|
|
760
|
+
# Construct intervals_ manually if not in binner
|
|
761
|
+
if hasattr(self.binner, "intervals_"):
|
|
762
|
+
self.binner_intervals_ = self.binner.intervals_
|
|
763
|
+
else:
|
|
764
|
+
self.binner_intervals_ = {
|
|
765
|
+
i: (self.binner.bins_[i - 1], self.binner.bins_[i])
|
|
766
|
+
for i in range(1, len(self.binner.bins_))
|
|
767
|
+
}
|
|
768
|
+
|
|
769
|
+
residuals_binned = self.binner.transform(residuals)
|
|
770
|
+
self.in_sample_residuals_by_bin_ = {
|
|
771
|
+
bin: residuals[residuals_binned == bin]
|
|
772
|
+
for bin in self.binner_intervals_.keys()
|
|
773
|
+
}
|
|
774
|
+
|
|
775
|
+
# Limit residuals stored per bin
|
|
776
|
+
max_residuals_per_bin = 1000 // self.binner.n_bins
|
|
777
|
+
for bin, res in self.in_sample_residuals_by_bin_.items():
|
|
778
|
+
if len(res) > max_residuals_per_bin:
|
|
779
|
+
rng = np.random.default_rng(seed=123)
|
|
780
|
+
self.in_sample_residuals_by_bin_[bin] = rng.choice(
|
|
781
|
+
res, size=max_residuals_per_bin, replace=False
|
|
782
|
+
)
|
|
783
|
+
|
|
784
|
+
def _create_predict_inputs(
|
|
785
|
+
self,
|
|
786
|
+
steps: int,
|
|
787
|
+
last_window: Union[pd.Series, pd.DataFrame, None] = None,
|
|
788
|
+
exog: Union[pd.Series, pd.DataFrame, None] = None,
|
|
789
|
+
check_inputs: bool = True,
|
|
790
|
+
) -> Tuple[np.ndarray, Union[np.ndarray, None], pd.Index, pd.Index]:
|
|
791
|
+
|
|
792
|
+
if last_window is None:
|
|
793
|
+
last_window = self.last_window_
|
|
794
|
+
|
|
795
|
+
if check_inputs:
|
|
796
|
+
check_predict_input(
|
|
797
|
+
forecaster_name=type(self).__name__,
|
|
798
|
+
steps=steps,
|
|
799
|
+
is_fitted=self.is_fitted,
|
|
800
|
+
exog_in_=self.exog_in_,
|
|
801
|
+
index_type_=self.index_type_,
|
|
802
|
+
index_freq_=self.index_freq_,
|
|
803
|
+
window_size=self.window_size,
|
|
804
|
+
last_window=last_window,
|
|
805
|
+
last_window_exog=None,
|
|
806
|
+
exog=exog,
|
|
807
|
+
exog_names_in_=self.exog_names_in_,
|
|
808
|
+
interval=None,
|
|
809
|
+
# alpha=None, # Removed alpha check for now
|
|
810
|
+
)
|
|
811
|
+
|
|
812
|
+
last_window = input_to_frame(data=last_window, input_name="last_window")
|
|
813
|
+
_, last_window_index = check_extract_values_and_index(
|
|
814
|
+
data=last_window,
|
|
815
|
+
data_label="`last_window`",
|
|
816
|
+
ignore_freq=True,
|
|
817
|
+
return_values=False,
|
|
818
|
+
)
|
|
819
|
+
|
|
820
|
+
prediction_index = expand_index(index=last_window_index, steps=steps)
|
|
821
|
+
|
|
822
|
+
last_window = transform_dataframe(
|
|
823
|
+
df=last_window,
|
|
824
|
+
transformer=self.transformer_y,
|
|
825
|
+
fit=False,
|
|
826
|
+
inverse_transform=False,
|
|
827
|
+
)
|
|
828
|
+
last_window_values, _ = check_extract_values_and_index(
|
|
829
|
+
data=last_window, data_label="`last_window`"
|
|
830
|
+
)
|
|
831
|
+
last_window_values = last_window_values.ravel()
|
|
832
|
+
|
|
833
|
+
if self.differentiation is not None:
|
|
834
|
+
last_window_values = self.differentiator.fit_transform(last_window_values)
|
|
835
|
+
|
|
836
|
+
exog_values = None
|
|
837
|
+
exog_index = None
|
|
838
|
+
|
|
839
|
+
if exog is not None:
|
|
840
|
+
exog = input_to_frame(data=exog, input_name="exog")
|
|
841
|
+
exog = transform_dataframe(
|
|
842
|
+
df=exog,
|
|
843
|
+
transformer=self.transformer_exog,
|
|
844
|
+
fit=False,
|
|
845
|
+
inverse_transform=False,
|
|
846
|
+
)
|
|
847
|
+
|
|
848
|
+
exog_values, exog_index = check_extract_values_and_index(
|
|
849
|
+
data=exog, data_label="`exog`"
|
|
850
|
+
)
|
|
851
|
+
|
|
852
|
+
exog_values = (
|
|
853
|
+
exog_values if isinstance(exog, pd.Series) else exog.to_numpy()
|
|
854
|
+
)
|
|
855
|
+
|
|
856
|
+
return last_window_values, exog_values, prediction_index, exog_index
|
|
857
|
+
|
|
858
|
+
def _recursive_predict(
|
|
859
|
+
self,
|
|
860
|
+
steps: int,
|
|
861
|
+
last_window_values: np.ndarray,
|
|
862
|
+
exog_values: Union[np.ndarray, None] = None,
|
|
863
|
+
) -> np.ndarray:
|
|
864
|
+
|
|
865
|
+
predictions = np.full(shape=steps, fill_value=np.nan)
|
|
866
|
+
|
|
867
|
+
for step in range(steps):
|
|
868
|
+
|
|
869
|
+
X_gen = []
|
|
870
|
+
|
|
871
|
+
if self.lags is not None:
|
|
872
|
+
X_lags = last_window_values[-self.lags]
|
|
873
|
+
if X_lags.ndim == 1:
|
|
874
|
+
X_lags = X_lags.reshape(1, -1)
|
|
875
|
+
X_gen.append(X_lags)
|
|
876
|
+
|
|
877
|
+
if self.window_features is not None:
|
|
878
|
+
X_window_features = []
|
|
879
|
+
for wf in self.window_features:
|
|
880
|
+
wf_values = wf.transform(last_window_values)
|
|
881
|
+
X_window_features.append(wf_values[-1:])
|
|
882
|
+
|
|
883
|
+
X_window_features = np.concatenate(X_window_features, axis=1)
|
|
884
|
+
X_gen.append(X_window_features)
|
|
885
|
+
|
|
886
|
+
if self.exog_in_:
|
|
887
|
+
X_exog = exog_values[step]
|
|
888
|
+
if X_exog.ndim < 2:
|
|
889
|
+
X_exog = X_exog.reshape(1, -1)
|
|
890
|
+
X_gen.append(X_exog)
|
|
891
|
+
|
|
892
|
+
X_gen = np.concatenate(X_gen, axis=1)
|
|
893
|
+
|
|
894
|
+
# Convert to DataFrame with feature names to avoid sklearn warning
|
|
895
|
+
if self.X_train_features_names_out_ is not None:
|
|
896
|
+
X_gen = pd.DataFrame(X_gen, columns=self.X_train_features_names_out_)
|
|
897
|
+
|
|
898
|
+
pred = self.estimator.predict(X_gen)
|
|
899
|
+
predictions[step] = pred[0]
|
|
900
|
+
|
|
901
|
+
last_window_values = np.append(last_window_values, pred)
|
|
902
|
+
|
|
903
|
+
return predictions
|
|
904
|
+
|
|
905
|
+
def predict(
|
|
906
|
+
self,
|
|
907
|
+
steps: int,
|
|
908
|
+
last_window: Union[pd.Series, pd.DataFrame, None] = None,
|
|
909
|
+
exog: Union[pd.Series, pd.DataFrame, None] = None,
|
|
910
|
+
check_inputs: bool = True,
|
|
911
|
+
) -> pd.Series:
|
|
912
|
+
|
|
913
|
+
last_window_values, exog_values, prediction_index, _ = (
|
|
914
|
+
self._create_predict_inputs(
|
|
915
|
+
steps=steps,
|
|
916
|
+
last_window=last_window,
|
|
917
|
+
exog=exog,
|
|
918
|
+
check_inputs=check_inputs,
|
|
919
|
+
)
|
|
920
|
+
)
|
|
921
|
+
|
|
922
|
+
predictions = self._recursive_predict(
|
|
923
|
+
steps=steps, last_window_values=last_window_values, exog_values=exog_values
|
|
924
|
+
)
|
|
925
|
+
|
|
926
|
+
if self.differentiation is not None:
|
|
927
|
+
predictions = self.differentiator.inverse_transform_next_window(predictions)
|
|
928
|
+
|
|
929
|
+
predictions = transform_dataframe(
|
|
930
|
+
df=pd.Series(predictions, name="pred").to_frame(),
|
|
931
|
+
transformer=self.transformer_y,
|
|
932
|
+
fit=False,
|
|
933
|
+
inverse_transform=True,
|
|
934
|
+
)
|
|
935
|
+
|
|
936
|
+
predictions = predictions.iloc[:, 0]
|
|
937
|
+
predictions.index = prediction_index
|
|
938
|
+
|
|
939
|
+
return predictions
|