spotforecast2 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. spotforecast2/.DS_Store +0 -0
  2. spotforecast2/__init__.py +2 -0
  3. spotforecast2/data/__init__.py +0 -0
  4. spotforecast2/data/data.py +130 -0
  5. spotforecast2/data/fetch_data.py +209 -0
  6. spotforecast2/exceptions.py +681 -0
  7. spotforecast2/forecaster/.DS_Store +0 -0
  8. spotforecast2/forecaster/__init__.py +7 -0
  9. spotforecast2/forecaster/base.py +448 -0
  10. spotforecast2/forecaster/metrics.py +527 -0
  11. spotforecast2/forecaster/recursive/__init__.py +4 -0
  12. spotforecast2/forecaster/recursive/_forecaster_equivalent_date.py +1075 -0
  13. spotforecast2/forecaster/recursive/_forecaster_recursive.py +939 -0
  14. spotforecast2/forecaster/recursive/_warnings.py +15 -0
  15. spotforecast2/forecaster/utils.py +954 -0
  16. spotforecast2/model_selection/__init__.py +5 -0
  17. spotforecast2/model_selection/bayesian_search.py +453 -0
  18. spotforecast2/model_selection/grid_search.py +314 -0
  19. spotforecast2/model_selection/random_search.py +151 -0
  20. spotforecast2/model_selection/split_base.py +357 -0
  21. spotforecast2/model_selection/split_one_step.py +245 -0
  22. spotforecast2/model_selection/split_ts_cv.py +634 -0
  23. spotforecast2/model_selection/utils_common.py +718 -0
  24. spotforecast2/model_selection/utils_metrics.py +103 -0
  25. spotforecast2/model_selection/validation.py +685 -0
  26. spotforecast2/preprocessing/__init__.py +30 -0
  27. spotforecast2/preprocessing/_binner.py +378 -0
  28. spotforecast2/preprocessing/_common.py +123 -0
  29. spotforecast2/preprocessing/_differentiator.py +123 -0
  30. spotforecast2/preprocessing/_rolling.py +136 -0
  31. spotforecast2/preprocessing/curate_data.py +254 -0
  32. spotforecast2/preprocessing/imputation.py +92 -0
  33. spotforecast2/preprocessing/outlier.py +114 -0
  34. spotforecast2/preprocessing/split.py +139 -0
  35. spotforecast2/py.typed +0 -0
  36. spotforecast2/utils/__init__.py +43 -0
  37. spotforecast2/utils/convert_to_utc.py +44 -0
  38. spotforecast2/utils/data_transform.py +208 -0
  39. spotforecast2/utils/forecaster_config.py +344 -0
  40. spotforecast2/utils/generate_holiday.py +70 -0
  41. spotforecast2/utils/validation.py +569 -0
  42. spotforecast2/weather/__init__.py +0 -0
  43. spotforecast2/weather/weather_client.py +288 -0
  44. spotforecast2-0.0.1.dist-info/METADATA +47 -0
  45. spotforecast2-0.0.1.dist-info/RECORD +46 -0
  46. spotforecast2-0.0.1.dist-info/WHEEL +4 -0
@@ -0,0 +1,30 @@
1
+ from .curate_data import (
2
+ get_start_end,
3
+ curate_holidays,
4
+ curate_weather,
5
+ basic_ts_checks,
6
+ agg_and_resample_data,
7
+ )
8
+ from .outlier import mark_outliers, manual_outlier_removal
9
+ from .imputation import custom_weights, get_missing_weights
10
+ from .split import split_abs_train_val_test, split_rel_train_val_test
11
+ from ._differentiator import TimeSeriesDifferentiator
12
+ from ._binner import QuantileBinner
13
+ from ._rolling import RollingFeatures
14
+
15
+ __all__ = [
16
+ "get_start_end",
17
+ "curate_holidays",
18
+ "curate_weather",
19
+ "basic_ts_checks",
20
+ "agg_and_resample_data",
21
+ "mark_outliers",
22
+ "manual_outlier_removal",
23
+ "custom_weights",
24
+ "get_missing_weights",
25
+ "split_abs_train_val_test",
26
+ "split_rel_train_val_test",
27
+ "TimeSeriesDifferentiator",
28
+ "QuantileBinner",
29
+ "RollingFeatures",
30
+ ]
@@ -0,0 +1,378 @@
1
+ """
2
+ QuantileBinner class for binning data into quantile-based bins.
3
+
4
+ This module contains the QuantileBinner class which bins data into quantile-based bins
5
+ using numpy.percentile with optimized performance using numpy.searchsorted.
6
+ """
7
+
8
+ from __future__ import annotations
9
+ import warnings
10
+ import numpy as np
11
+ from sklearn.base import BaseEstimator, TransformerMixin
12
+ from sklearn.exceptions import NotFittedError
13
+
14
+ from spotforecast2.exceptions import IgnoredArgumentWarning
15
+
16
+
17
+ class QuantileBinner(BaseEstimator, TransformerMixin):
18
+ """
19
+ Bin data into quantile-based bins using numpy.percentile.
20
+
21
+ This class is similar to sklearn's KBinsDiscretizer but optimized for
22
+ performance using numpy.searchsorted for fast bin assignment. Bin intervals
23
+ are defined following the convention: bins[i-1] <= x < bins[i]. Values
24
+ outside the range are clipped to the first or last bin.
25
+
26
+ Args:
27
+ n_bins: The number of quantile-based bins to create. Must be >= 2.
28
+ method: The method used to compute quantiles, passed to numpy.percentile.
29
+ Default is 'linear'. Valid values: "inverse_cdf",
30
+ "averaged_inverse_cdf", "closest_observation",
31
+ "interpolated_inverse_cdf", "hazen", "weibull", "linear",
32
+ "median_unbiased", "normal_unbiased".
33
+ subsample: Maximum number of samples for computing quantiles. If dataset
34
+ has more samples, a random subset is used. Default 200000.
35
+ dtype: Data type for bin indices. Default is numpy.float64.
36
+ random_state: Random seed for subset generation. Default 789654.
37
+
38
+ Attributes:
39
+ n_bins (int): Number of bins to create.
40
+ method (str): Quantile computation method.
41
+ subsample (int): Maximum samples for quantile computation.
42
+ dtype (type): Data type for bin indices.
43
+ random_state (int): Random seed.
44
+ n_bins_ (int): Actual number of bins after fitting (may differ from n_bins
45
+ if duplicate edges are found).
46
+ bin_edges_ (np.ndarray): Edges of the bins learned during fitting.
47
+ internal_edges_ (np.ndarray): Internal edges for optimized bin assignment.
48
+ intervals_ (dict): Mapping from bin index to (lower, upper) interval bounds.
49
+
50
+ Examples:
51
+ >>> import numpy as np
52
+ >>> from spotforecast2.preprocessing import QuantileBinner
53
+ >>>
54
+ >>> # Basic usage: create 3 quantile bins
55
+ >>> X = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
56
+ >>> binner = QuantileBinner(n_bins=3)
57
+ >>> _ = binner.fit(X)
58
+ >>> result = binner.transform(np.array([1.5, 5.5, 9.5]))
59
+ >>> print(result)
60
+ [0. 1. 2.]
61
+ >>>
62
+ >>> # Check bin intervals
63
+ >>> print(binner.n_bins_)
64
+ 3
65
+ >>> assert len(binner.intervals_) == 3
66
+ >>>
67
+ >>> # Use fit_transform for one-step operation
68
+ >>> X2 = np.array([10, 20, 30, 40, 50])
69
+ >>> binner2 = QuantileBinner(n_bins=2)
70
+ >>> bins = binner2.fit_transform(X2)
71
+ >>> print(bins)
72
+ [0. 0. 1. 1. 1.]
73
+ """
74
+
75
+ def __init__(
76
+ self,
77
+ n_bins: int,
78
+ method: str = "linear",
79
+ subsample: int = 200000,
80
+ dtype: type = np.float64,
81
+ random_state: int = 789654,
82
+ ) -> None:
83
+
84
+ self._validate_params(n_bins, method, subsample, dtype, random_state)
85
+
86
+ self.n_bins = n_bins
87
+ self.method = method
88
+ self.subsample = subsample
89
+ self.dtype = dtype
90
+ self.random_state = random_state
91
+ self.n_bins_ = None
92
+ self.bin_edges_ = None
93
+ self.internal_edges_ = None
94
+ self.intervals_ = None
95
+
96
+ def _validate_params(
97
+ self, n_bins: int, method: str, subsample: int, dtype: type, random_state: int
98
+ ):
99
+ """
100
+ Validate parameters passed to the class initializer.
101
+
102
+ Args:
103
+ n_bins: Number of quantile-based bins. Must be int >= 2.
104
+ method: Quantile computation method for numpy.percentile.
105
+ subsample: Number of samples for computing quantiles. Must be int >= 1.
106
+ dtype: Data type for bin indices. Must be a valid numpy dtype.
107
+ random_state: Random seed for subset generation. Must be int >= 0.
108
+
109
+ Raises:
110
+ ValueError: If n_bins < 2, method is invalid, subsample < 1,
111
+ random_state < 0, or dtype is not a valid type.
112
+
113
+ Examples:
114
+ >>> import numpy as np
115
+ >>> from spotforecast2.preprocessing import QuantileBinner
116
+ >>>
117
+ >>> # Valid parameters work fine
118
+ >>> binner = QuantileBinner(n_bins=5, method='linear')
119
+ >>> assert binner.n_bins == 5
120
+ >>>
121
+ >>> # Invalid n_bins raises ValueError
122
+ >>> try:
123
+ ... binner = QuantileBinner(n_bins=1)
124
+ ... except ValueError as e:
125
+ ... assert 'greater than 1' in str(e)
126
+ >>>
127
+ >>> # Invalid method raises ValueError
128
+ >>> try:
129
+ ... binner = QuantileBinner(n_bins=3, method='invalid')
130
+ ... except ValueError as e:
131
+ ... assert 'must be one of' in str(e)
132
+ """
133
+
134
+ if not isinstance(n_bins, int) or n_bins < 2:
135
+ raise ValueError(f"`n_bins` must be an int greater than 1. Got {n_bins}.")
136
+
137
+ valid_methods = [
138
+ "inverse_cdf",
139
+ "averaged_inverse_cdf",
140
+ "closest_observation",
141
+ "interpolated_inverse_cdf",
142
+ "hazen",
143
+ "weibull",
144
+ "linear",
145
+ "median_unbiased",
146
+ "normal_unbiased",
147
+ ]
148
+ if method not in valid_methods:
149
+ raise ValueError(f"`method` must be one of {valid_methods}. Got {method}.")
150
+ if not isinstance(subsample, int) or subsample < 1:
151
+ raise ValueError(
152
+ f"`subsample` must be an integer greater than or equal to 1. "
153
+ f"Got {subsample}."
154
+ )
155
+ if not isinstance(random_state, int) or random_state < 0:
156
+ raise ValueError(
157
+ f"`random_state` must be an integer greater than or equal to 0. "
158
+ f"Got {random_state}."
159
+ )
160
+ if not isinstance(dtype, type):
161
+ raise ValueError(f"`dtype` must be a valid numpy dtype. Got {dtype}.")
162
+
163
+ def fit(self, X: np.ndarray, y: object = None) -> object:
164
+ """
165
+ Learn bin edges based on quantiles from training data.
166
+
167
+ Computes quantile-based bin edges using numpy.percentile. If the dataset
168
+ contains more samples than `subsample`, a random subset is used. Duplicate
169
+ edges (which can occur with repeated values) are removed automatically.
170
+
171
+ Args:
172
+ X: Training data (1D numpy array) for computing quantiles.
173
+ y: Ignored.
174
+
175
+ Returns:
176
+ Self for method chaining.
177
+
178
+ Raises:
179
+ ValueError: If input data X is empty.
180
+
181
+ Examples:
182
+ >>> import numpy as np
183
+ >>> from spotforecast2.preprocessing import QuantileBinner
184
+ >>>
185
+ >>> # Fit with basic data
186
+ >>> X = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
187
+ >>> binner = QuantileBinner(n_bins=3)
188
+ >>> _ = binner.fit(X)
189
+ >>> print(binner.n_bins_)
190
+ 3
191
+ >>> print(len(binner.bin_edges_))
192
+ 4
193
+ >>>
194
+ >>> # Fit with repeated values (may reduce number of bins)
195
+ >>> X_repeated = np.array([1, 1, 1, 2, 2, 2, 3, 3, 3])
196
+ >>> binner2 = QuantileBinner(n_bins=5)
197
+ >>> _ = binner2.fit(X_repeated)
198
+ >>> # n_bins_ may be less than 5 due to duplicates
199
+ >>> assert binner2.n_bins_ <= 5
200
+ """
201
+ # Note: Original implementation expects X, but sklearn TransformerMixin passes y=None.
202
+ # Adjusted signature to (self, X: np.ndarray, y: object = None)
203
+
204
+ if X.size == 0:
205
+ raise ValueError("Input data `X` cannot be empty.")
206
+ if len(X) > self.subsample:
207
+ rng = np.random.default_rng(self.random_state)
208
+ X = X[rng.integers(0, len(X), self.subsample)]
209
+
210
+ bin_edges = np.percentile(
211
+ a=X, q=np.linspace(0, 100, self.n_bins + 1), method=self.method
212
+ )
213
+
214
+ # Remove duplicate edges (can happen when data has many repeated values)
215
+ # to ensure bins are always numbered 0 to n_bins_-1
216
+ self.bin_edges_ = np.unique(bin_edges)
217
+
218
+ # Ensure at least 1 bin when all values are identical
219
+ if len(self.bin_edges_) == 1:
220
+ # Create artificial edges around the single value
221
+ self.bin_edges_ = np.array([self.bin_edges_.item(), self.bin_edges_.item()])
222
+
223
+ self.n_bins_ = len(self.bin_edges_) - 1
224
+
225
+ if self.n_bins_ != self.n_bins:
226
+ warnings.warn(
227
+ f"The number of bins has been reduced from {self.n_bins} to "
228
+ f"{self.n_bins_} due to duplicated edges caused by repeated predicted "
229
+ f"values.",
230
+ IgnoredArgumentWarning,
231
+ )
232
+
233
+ # Internal edges for optimized transform with searchsorted
234
+ self.internal_edges_ = self.bin_edges_[1:-1]
235
+ self.intervals_ = {
236
+ int(i): (float(self.bin_edges_[i]), float(self.bin_edges_[i + 1]))
237
+ for i in range(self.n_bins_)
238
+ }
239
+
240
+ return self
241
+
242
+ def transform(self, X: np.ndarray, y: object = None) -> np.ndarray:
243
+ """
244
+ Assign new data to learned bins.
245
+
246
+ Uses numpy.searchsorted for efficient bin assignment. Values are assigned
247
+ to bins following the convention: bins[i-1] <= x < bins[i]. Values outside
248
+ the fitted range are clipped to the first or last bin.
249
+
250
+ Args:
251
+ X: Data to assign to bins (1D numpy array).
252
+ y: Ignored.
253
+
254
+ Returns:
255
+ Bin indices as numpy array with dtype specified in __init__.
256
+
257
+ Raises:
258
+ NotFittedError: If fit() has not been called yet.
259
+
260
+ Examples:
261
+ >>> import numpy as np
262
+ >>> from spotforecast2.preprocessing import QuantileBinner
263
+ >>>
264
+ >>> # Fit and transform
265
+ >>> X_train = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
266
+ >>> binner = QuantileBinner(n_bins=3)
267
+ >>> _ = binner.fit(X_train)
268
+ >>>
269
+ >>> X_test = np.array([1.5, 5.5, 9.5])
270
+ >>> result = binner.transform(X_test)
271
+ >>> print(result)
272
+ [0. 1. 2.]
273
+ >>>
274
+ >>> # Values outside range are clipped
275
+ >>> X_extreme = np.array([0, 100])
276
+ >>> result_extreme = binner.transform(X_extreme)
277
+ >>> print(result_extreme) # Both clipped to valid bin indices
278
+ [0. 2.]
279
+ """
280
+
281
+ if self.bin_edges_ is None:
282
+ raise NotFittedError(
283
+ "The model has not been fitted yet. Call 'fit' with training data first."
284
+ )
285
+
286
+ bin_indices = np.searchsorted(self.internal_edges_, X, side="right").astype(
287
+ self.dtype
288
+ )
289
+
290
+ return bin_indices
291
+
292
+ def fit_transform(self, X, y=None, **fit_params):
293
+ """
294
+ Fit to data, then transform it.
295
+
296
+ Fits transformer to X and y with optional parameters fit_params
297
+ and returns a transformed version of X.
298
+
299
+ Parameters
300
+ ----------
301
+ X : array-like of shape (n_samples, n_features)
302
+ Input samples.
303
+
304
+ y : array-like of shape (n_samples,) or (n_samples, n_outputs), \
305
+ default=None
306
+ Target values (None for unsupervised transformations).
307
+
308
+ **fit_params : dict
309
+ Additional fit parameters.
310
+
311
+ Returns
312
+ -------
313
+ X_new : ndarray array of shape (n_samples, n_features_new)
314
+ Transformed array.
315
+ """
316
+ # fit_transform is usually provided by TransformerMixin but we can implement it
317
+ # or rely on inheritance. The original implementation had it explicitly.
318
+
319
+ self.fit(X, y)
320
+ return self.transform(X, y)
321
+
322
+ def get_params(self, deep=True):
323
+ """
324
+ Get parameters of the quantile binner.
325
+
326
+ Returns:
327
+ Dictionary containing n_bins, method, subsample, dtype, and
328
+ random_state parameters.
329
+
330
+ Examples:
331
+ >>> import numpy as np
332
+ >>> from spotforecast2.preprocessing import QuantileBinner
333
+ >>>
334
+ >>> binner = QuantileBinner(n_bins=5, method='median_unbiased', subsample=1000)
335
+ >>> params = binner.get_params()
336
+ >>> print(params['n_bins'])
337
+ 5
338
+ >>> print(params['method'])
339
+ median_unbiased
340
+ >>> print(params['subsample'])
341
+ 1000
342
+ """
343
+
344
+ return {
345
+ "n_bins": self.n_bins,
346
+ "method": self.method,
347
+ "subsample": self.subsample,
348
+ "dtype": self.dtype,
349
+ "random_state": self.random_state,
350
+ }
351
+
352
+ def set_params(self, **params):
353
+ """
354
+ Set parameters of the QuantileBinner.
355
+
356
+ Args:
357
+ **params: Parameter names and values to set as keyword arguments.
358
+
359
+ Returns:
360
+ None
361
+
362
+ Examples:
363
+ >>> import numpy as np
364
+ >>> from spotforecast2.preprocessing import QuantileBinner
365
+ >>>
366
+ >>> binner = QuantileBinner(n_bins=3)
367
+ >>> print(binner.n_bins)
368
+ 3
369
+ >>> binner.set_params(n_bins=5, method='weibull')
370
+ >>> print(binner.n_bins)
371
+ 5
372
+ >>> print(binner.method)
373
+ weibull
374
+ """
375
+
376
+ for param, value in params.items():
377
+ setattr(self, param, value)
378
+ return self
@@ -0,0 +1,123 @@
1
+ """
2
+ Common preprocessing functions and utilities.
3
+ """
4
+
5
+ import functools
6
+ from typing import Callable, Any
7
+ import numpy as np
8
+ from numba import njit
9
+
10
+
11
+ def _check_X_numpy_ndarray_1d(ensure_1d: bool = True):
12
+ """
13
+ Decorator to check if argument `X` is a 1D numpy ndarray.
14
+
15
+ Args:
16
+ ensure_1d : bool, default True
17
+ If True, ensure X is a 1D array.
18
+
19
+ Returns:
20
+ wrapper : Callable
21
+ Decorated function.
22
+ """
23
+
24
+ def decorator(func: Callable):
25
+ @functools.wraps(func)
26
+ def wrapper(self, *args, **kwargs):
27
+ # args[0] is self, args[1] is X (if passed positional)
28
+ # kwargs might contain X
29
+ X = kwargs.get("X")
30
+ if X is None and len(args) > 0:
31
+ X = args[0]
32
+
33
+ if X is not None:
34
+ if not isinstance(X, np.ndarray):
35
+ raise TypeError(f"`X` must be a numpy ndarray. Got {type(X)}.")
36
+ if ensure_1d and X.ndim != 1:
37
+ raise ValueError(f"`X` must be a 1D numpy ndarray. Got {X.ndim}D.")
38
+
39
+ return func(self, *args, **kwargs)
40
+
41
+ return wrapper
42
+
43
+ return decorator
44
+
45
+
46
+ @njit(cache=True)
47
+ def _np_mean_jit(x: np.ndarray) -> float:
48
+ """
49
+ Numba optimized mean function.
50
+ """
51
+ return np.nanmean(x)
52
+
53
+
54
+ @njit(cache=True)
55
+ def _np_std_jit(x: np.ndarray) -> float:
56
+ """
57
+ Numba optimized std function (ddof=1).
58
+ """
59
+ return np.nanstd(x)
60
+
61
+
62
+ @njit(cache=True)
63
+ def _np_min_jit(x: np.ndarray) -> float:
64
+ """
65
+ Numba optimized min function.
66
+ """
67
+ return np.nanmin(x)
68
+
69
+
70
+ @njit(cache=True)
71
+ def _np_max_jit(x: np.ndarray) -> float:
72
+ """
73
+ Numba optimized max function.
74
+ """
75
+ return np.nanmax(x)
76
+
77
+
78
+ @njit(cache=True)
79
+ def _np_sum_jit(x: np.ndarray) -> float:
80
+ """
81
+ Numba optimized sum function.
82
+ """
83
+ return np.nansum(x)
84
+
85
+
86
+ @njit(cache=True)
87
+ def _np_median_jit(x: np.ndarray) -> float:
88
+ """
89
+ Numba optimized median function.
90
+ """
91
+ return np.nanmedian(x)
92
+
93
+
94
+ def check_valid_quantile(quantile: float | list[float] | tuple[float]) -> None:
95
+ """
96
+ Check if quantile is valid (0 <= quantile <= 1).
97
+ """
98
+ if isinstance(quantile, (float, int)):
99
+ if not (0 <= quantile <= 1):
100
+ raise ValueError(f"Quantile must be between 0 and 1. Got {quantile}.")
101
+ elif isinstance(quantile, (list, tuple, np.ndarray)):
102
+ for q in quantile:
103
+ if not (0 <= q <= 1):
104
+ raise ValueError(f"Quantiles must be between 0 and 1. Got {q}.")
105
+ else:
106
+ raise TypeError(
107
+ f"Quantile must be a float, list, tuple or numpy array. Got {type(quantile)}."
108
+ )
109
+
110
+
111
+ def check_is_fitted(estimator: Any, attributes: list[str] | None = None) -> None:
112
+ """
113
+ Check if estimator is fitted by verifying if attributes exist.
114
+ """
115
+ if attributes is None:
116
+ attributes = []
117
+
118
+ for attr in attributes:
119
+ if not hasattr(estimator, attr):
120
+ raise ValueError(
121
+ f"This {type(estimator).__name__} instance is not fitted yet. "
122
+ f"Call 'fit' with appropriate arguments before using this estimator."
123
+ )
@@ -0,0 +1,123 @@
1
+ import numpy as np
2
+ from sklearn.base import BaseEstimator, TransformerMixin
3
+ from sklearn.utils.validation import check_is_fitted
4
+ from ._common import _check_X_numpy_ndarray_1d
5
+
6
+
7
+ class TimeSeriesDifferentiator(BaseEstimator, TransformerMixin):
8
+ """
9
+ Transforms a time series into a differenced time series.
10
+
11
+ Args:
12
+ order (int, optional): Order of differentiation. Defaults to 1.
13
+ initial_values (list, numpy ndarray, optional): Values to be used for the inverse transformation (reverting differentiation).
14
+ If None, the first `order` values of the training data `X` are stored during `fit`.
15
+
16
+ Attributes:
17
+ initial_values_ (list): Values stored for inverse transformation.
18
+ last_values_ (list): Last values of the differenced time series.
19
+ """
20
+
21
+ def __init__(self, order: int = 1, initial_values: list | np.ndarray | None = None):
22
+ self.order = order
23
+ self.initial_values = initial_values
24
+
25
+ @_check_X_numpy_ndarray_1d(ensure_1d=True)
26
+ def fit(self, X: np.ndarray, y: object = None) -> object:
27
+ """
28
+ Store initial values if not provided.
29
+ """
30
+ if self.order < 1:
31
+ raise ValueError("`order` must be a positive integer.")
32
+
33
+ if self.initial_values is None:
34
+ if len(X) < self.order:
35
+ raise ValueError(
36
+ f"The time series must have at least {self.order} values "
37
+ f"to compute the differentiation of order {self.order}."
38
+ )
39
+ self.initial_values_ = list(X[: self.order])
40
+ else:
41
+ if len(self.initial_values) != self.order:
42
+ raise ValueError(
43
+ f"The length of `initial_values` must be equal to the order "
44
+ f"of differentiation ({self.order})."
45
+ )
46
+ self.initial_values_ = list(self.initial_values)
47
+
48
+ self.last_values_ = X[-self.order :]
49
+
50
+ return self
51
+
52
+ @_check_X_numpy_ndarray_1d(ensure_1d=True)
53
+ def transform(self, X: np.ndarray, y: object = None) -> np.ndarray:
54
+ """
55
+ Compute the differences.
56
+ """
57
+ if not hasattr(self, "initial_values_") and self.initial_values is not None:
58
+ self.fit(X)
59
+ elif not hasattr(self, "initial_values_"):
60
+ check_is_fitted(self, ["initial_values_"])
61
+
62
+ X_diff = np.diff(X, n=self.order)
63
+ # Pad with NaNs to keep same length
64
+ X_diff = np.concatenate([np.full(self.order, np.nan), X_diff])
65
+
66
+ # Update last values seen (for next window inverse)
67
+ self.last_values_ = X[-self.order :]
68
+
69
+ return X_diff
70
+
71
+ def inverse_transform_next_window(self, X: np.ndarray) -> np.ndarray:
72
+ """
73
+ Inverse transform for the next window of predictions.
74
+ """
75
+ check_is_fitted(self, ["initial_values_", "last_values_"])
76
+
77
+ if self.order == 1:
78
+ result = np.cumsum(X) + self.last_values_[-1]
79
+ else:
80
+ # Recursive or iterative approach for higher orders
81
+ # Simplified: Assuming order 1 is sufficient for now or throwing error
82
+ raise NotImplementedError(
83
+ "inverse_transform_next_window not implemented for order > 1"
84
+ )
85
+
86
+ return result
87
+
88
+ @_check_X_numpy_ndarray_1d(ensure_1d=True)
89
+ def inverse_transform(self, X: np.ndarray, y: object = None) -> np.ndarray:
90
+ """
91
+ Revert the differences.
92
+ """
93
+ check_is_fitted(self, ["initial_values_"])
94
+
95
+ # X contains the differenced series (with NaNs at the beginning potentially)
96
+ # remove NaNs at the start corresponding to order
97
+ X_clean = X[self.order :]
98
+
99
+ if len(X_clean) == 0:
100
+ # Just return initial values if only NaNs were passed
101
+ return np.array(self.initial_values_)
102
+
103
+ result = list(self.initial_values_)
104
+
105
+ if self.order == 1:
106
+ current_value = result[-1]
107
+ restored = []
108
+ for diff_val in X_clean:
109
+ current_value += diff_val
110
+ restored.append(current_value)
111
+ result.extend(restored)
112
+ else:
113
+ # Recursive reconstruction for higher orders logic check
114
+ # For order > 1, np.diff does repeated diffs.
115
+ # To invert, we need to do repeated cumsum.
116
+ # But we need appropriate initial values for each level of integration.
117
+ # This is a simplified version.
118
+
119
+ raise NotImplementedError(
120
+ "Inverse transform for order > 1 is currently not fully implemented in this port."
121
+ )
122
+
123
+ return np.array(result)