spotforecast2 0.1.0__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/PKG-INFO +1 -1
  2. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/pyproject.toml +1 -1
  3. spotforecast2-0.2.0/src/spotforecast2/processing/n2n_predict.py +444 -0
  4. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/processing/n2n_predict_with_covariates.py +28 -14
  5. spotforecast2-0.1.0/src/spotforecast2/processing/n2n_predict.py +0 -126
  6. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/README.md +0 -0
  7. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/__init__.py +0 -0
  8. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/data/__init__.py +0 -0
  9. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/data/data.py +0 -0
  10. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/data/fetch_data.py +0 -0
  11. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/exceptions.py +0 -0
  12. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/forecaster/__init__.py +0 -0
  13. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/forecaster/base.py +0 -0
  14. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/forecaster/metrics.py +0 -0
  15. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/forecaster/recursive/__init__.py +0 -0
  16. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/forecaster/recursive/_forecaster_equivalent_date.py +0 -0
  17. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/forecaster/recursive/_forecaster_recursive.py +0 -0
  18. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/forecaster/recursive/_warnings.py +0 -0
  19. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/forecaster/utils.py +0 -0
  20. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/model_selection/__init__.py +0 -0
  21. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/model_selection/bayesian_search.py +0 -0
  22. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/model_selection/grid_search.py +0 -0
  23. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/model_selection/random_search.py +0 -0
  24. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/model_selection/split_base.py +0 -0
  25. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/model_selection/split_one_step.py +0 -0
  26. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/model_selection/split_ts_cv.py +0 -0
  27. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/model_selection/utils_common.py +0 -0
  28. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/model_selection/utils_metrics.py +0 -0
  29. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/model_selection/validation.py +0 -0
  30. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/preprocessing/__init__.py +0 -0
  31. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/preprocessing/_binner.py +0 -0
  32. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/preprocessing/_common.py +0 -0
  33. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/preprocessing/_differentiator.py +0 -0
  34. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/preprocessing/_rolling.py +0 -0
  35. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/preprocessing/curate_data.py +0 -0
  36. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/preprocessing/imputation.py +0 -0
  37. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/preprocessing/outlier.py +0 -0
  38. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/preprocessing/split.py +0 -0
  39. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/processing/agg_predict.py +0 -0
  40. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/py.typed +0 -0
  41. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/utils/__init__.py +0 -0
  42. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/utils/convert_to_utc.py +0 -0
  43. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/utils/data_transform.py +0 -0
  44. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/utils/forecaster_config.py +0 -0
  45. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/utils/generate_holiday.py +0 -0
  46. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/utils/validation.py +0 -0
  47. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/weather/__init__.py +0 -0
  48. {spotforecast2-0.1.0 → spotforecast2-0.2.0}/src/spotforecast2/weather/weather_client.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: spotforecast2
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: Forecasting with spot
5
5
  Author: bartzbeielstein
6
6
  Author-email: bartzbeielstein <32470350+bartzbeielstein@users.noreply.github.com>
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "spotforecast2"
3
- version = "0.1.0"
3
+ version = "0.2.0"
4
4
  description = "Forecasting with spot"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -0,0 +1,444 @@
1
+ """
2
+ End-to-end baseline forecasting using equivalent date method.
3
+
4
+ This module provides a complete forecasting pipeline using the ForecasterEquivalentDate
5
+ baseline model. It handles data preparation, outlier detection, imputation, model
6
+ training, and prediction in a single integrated function.
7
+
8
+ Model persistence follows scikit-learn conventions using joblib for efficient
9
+ serialization and deserialization of trained forecasters.
10
+
11
+ Examples:
12
+ Basic usage with default parameters:
13
+
14
+ >>> from spotforecast2.processing.n2n_predict import n2n_predict
15
+ >>> predictions = n2n_predict(forecast_horizon=24, verbose=True)
16
+
17
+ Using cached models:
18
+
19
+ >>> # Load existing models if available, or train new ones
20
+ >>> predictions = n2n_predict(
21
+ ... forecast_horizon=24,
22
+ ... force_train=False,
23
+ ... model_dir="./models",
24
+ ... verbose=True
25
+ ... )
26
+
27
+ Force retraining and update cache:
28
+
29
+ >>> predictions = n2n_predict(
30
+ ... forecast_horizon=24,
31
+ ... force_train=True,
32
+ ... model_dir="./models",
33
+ ... verbose=True
34
+ ... )
35
+ """
36
+
37
+ from pathlib import Path
38
+ from typing import Dict, List, Optional, Tuple, Union
39
+
40
+ import pandas as pd
41
+ from spotforecast2.forecaster.recursive import ForecasterEquivalentDate
42
+ from spotforecast2.data.fetch_data import fetch_data
43
+ from spotforecast2.preprocessing.curate_data import basic_ts_checks
44
+ from spotforecast2.preprocessing.curate_data import agg_and_resample_data
45
+ from spotforecast2.preprocessing.outlier import mark_outliers
46
+ from spotforecast2.preprocessing.split import split_rel_train_val_test
47
+ from spotforecast2.forecaster.utils import predict_multivariate
48
+ from spotforecast2.preprocessing.curate_data import get_start_end
49
+
50
+ try:
51
+ from joblib import dump, load
52
+ except ImportError:
53
+ raise ImportError("joblib is required. Install with: pip install joblib")
54
+
55
+ try:
56
+ from tqdm.auto import tqdm
57
+ except ImportError: # pragma: no cover - fallback when tqdm is not installed
58
+ tqdm = None
59
+
60
+
61
+ # ============================================================================
62
+ # Model Persistence Functions
63
+ # ============================================================================
64
+
65
+
66
+ def _ensure_model_dir(model_dir: Union[str, Path]) -> Path:
67
+ """Ensure model directory exists.
68
+
69
+ Args:
70
+ model_dir: Directory path for model storage.
71
+
72
+ Returns:
73
+ Path: Validated Path object.
74
+
75
+ Raises:
76
+ OSError: If directory cannot be created.
77
+ """
78
+ model_path = Path(model_dir)
79
+ model_path.mkdir(parents=True, exist_ok=True)
80
+ return model_path
81
+
82
+
83
+ def _get_model_filepath(model_dir: Path, target: str) -> Path:
84
+ """Get filepath for a single model.
85
+
86
+ Args:
87
+ model_dir: Directory containing models.
88
+ target: Target variable name.
89
+
90
+ Returns:
91
+ Path: Full filepath for the model.
92
+
93
+ Examples:
94
+ >>> path = _get_model_filepath(Path("./models"), "power")
95
+ >>> str(path)
96
+ './models/forecaster_power.joblib'
97
+ """
98
+ return model_dir / f"forecaster_{target}.joblib"
99
+
100
+
101
+ def _save_forecasters(
102
+ forecasters: Dict[str, object],
103
+ model_dir: Union[str, Path],
104
+ verbose: bool = False,
105
+ ) -> Dict[str, Path]:
106
+ """Save trained forecasters to disk using joblib.
107
+
108
+ Follows scikit-learn persistence conventions using joblib for efficient
109
+ serialization of sklearn-compatible estimators.
110
+
111
+ Args:
112
+ forecasters: Dictionary mapping target names to trained ForecasterEquivalentDate objects.
113
+ model_dir: Directory to save models. Created if it doesn't exist.
114
+ verbose: Print progress messages. Default: False.
115
+
116
+ Returns:
117
+ Dict[str, Path]: Dictionary mapping target names to saved model filepaths.
118
+
119
+ Raises:
120
+ OSError: If models cannot be written to disk.
121
+ TypeError: If forecasters contain non-serializable objects.
122
+
123
+ Examples:
124
+ >>> forecasters = {"power": forecaster_obj}
125
+ >>> paths = _save_forecasters(forecasters, "./models", verbose=True)
126
+ >>> print(paths["power"])
127
+ models/forecaster_power.joblib
128
+ """
129
+ model_path = _ensure_model_dir(model_dir)
130
+ saved_paths = {}
131
+
132
+ for target, forecaster in forecasters.items():
133
+ filepath = _get_model_filepath(model_path, target)
134
+ try:
135
+ dump(forecaster, filepath, compress=3)
136
+ saved_paths[target] = filepath
137
+ if verbose:
138
+ print(f" ✓ Saved forecaster for {target} to {filepath}")
139
+ except Exception as e:
140
+ raise OSError(f"Failed to save model for {target}: {e}")
141
+
142
+ return saved_paths
143
+
144
+
145
+ def _load_forecasters(
146
+ target_columns: List[str],
147
+ model_dir: Union[str, Path],
148
+ verbose: bool = False,
149
+ ) -> Tuple[Dict[str, object], List[str]]:
150
+ """Load trained forecasters from disk using joblib.
151
+
152
+ Attempts to load all forecasters for given targets. Missing models
153
+ are indicated in the return value for selective retraining.
154
+
155
+ Args:
156
+ target_columns: List of target variable names to load.
157
+ model_dir: Directory containing saved models.
158
+ verbose: Print progress messages. Default: False.
159
+
160
+ Returns:
161
+ Tuple[Dict[str, object], List[str]]:
162
+ - forecasters: Dictionary of successfully loaded ForecasterEquivalentDate objects.
163
+ - missing_targets: List of target names without saved models.
164
+
165
+ Examples:
166
+ >>> forecasters, missing = _load_forecasters(
167
+ ... ["power", "energy"],
168
+ ... "./models",
169
+ ... verbose=True
170
+ ... )
171
+ >>> print(missing)
172
+ ['energy']
173
+ """
174
+ model_path = Path(model_dir)
175
+ forecasters = {}
176
+ missing_targets = []
177
+
178
+ for target in target_columns:
179
+ filepath = _get_model_filepath(model_path, target)
180
+ if filepath.exists():
181
+ try:
182
+ forecasters[target] = load(filepath)
183
+ if verbose:
184
+ print(f" ✓ Loaded forecaster for {target} from {filepath}")
185
+ except Exception as e:
186
+ if verbose:
187
+ print(f" ✗ Failed to load {target}: {e}")
188
+ missing_targets.append(target)
189
+ else:
190
+ missing_targets.append(target)
191
+
192
+ return forecasters, missing_targets
193
+
194
+
195
+ def _model_directory_exists(model_dir: Union[str, Path]) -> bool:
196
+ """Check if model directory exists.
197
+
198
+ Args:
199
+ model_dir: Directory path to check.
200
+
201
+ Returns:
202
+ bool: True if directory exists, False otherwise.
203
+ """
204
+ return Path(model_dir).exists()
205
+
206
+
207
+ # ============================================================================
208
+ # Main Function
209
+ # ============================================================================
210
+
211
+
212
+ def n2n_predict(
213
+ data: Optional[pd.DataFrame] = None,
214
+ columns: Optional[List[str]] = None,
215
+ forecast_horizon: int = 24,
216
+ contamination: float = 0.01,
217
+ window_size: int = 72,
218
+ force_train: bool = False,
219
+ model_dir: Union[str, Path] = "./models_baseline",
220
+ verbose: bool = True,
221
+ show_progress: bool = True,
222
+ ) -> Tuple[pd.DataFrame, Dict]:
223
+ """End-to-end baseline forecasting using equivalent date method.
224
+
225
+ This function implements a complete forecasting pipeline that:
226
+ 1. Loads and validates target data
227
+ 2. Detects and removes outliers
228
+ 3. Imputes missing values
229
+ 4. Splits into train/validation/test sets
230
+ 5. Trains or loads equivalent date forecasters
231
+ 6. Generates multi-step ahead predictions
232
+
233
+ Models are persisted to disk following scikit-learn conventions using joblib.
234
+ Existing models are reused for prediction unless force_train=True.
235
+
236
+ Args:
237
+ data: Optional DataFrame with target time series data. If None, fetches data automatically.
238
+ Default: None.
239
+ columns: List of target columns to forecast. If None, uses all available columns.
240
+ Default: None.
241
+ forecast_horizon: Number of time steps to forecast ahead. Default: 24.
242
+ contamination: Contamination parameter for outlier detection. Default: 0.01.
243
+ window_size: Rolling window size for gap detection. Default: 72.
244
+ force_train: Force retraining of all models, ignoring cached models.
245
+ Default: False.
246
+ model_dir: Directory for saving/loading trained models.
247
+ Default: "./models_baseline".
248
+ verbose: Print progress messages. Default: True.
249
+ show_progress: Show progress bar during training and prediction. Default: True.
250
+
251
+ Returns:
252
+ Tuple containing:
253
+ - predictions: DataFrame with forecast values for each target variable.
254
+ - forecasters: Dictionary of trained ForecasterEquivalentDate objects keyed by target.
255
+
256
+ Raises:
257
+ ValueError: If data validation fails or required data cannot be retrieved.
258
+ ImportError: If required dependencies are not installed.
259
+ OSError: If models cannot be saved to disk.
260
+
261
+ Examples:
262
+ Basic usage with automatic model caching:
263
+
264
+ >>> predictions, forecasters = n2n_predict(
265
+ ... forecast_horizon=24,
266
+ ... verbose=True
267
+ ... )
268
+ >>> print(predictions.shape)
269
+ (24, 11)
270
+
271
+ Load cached models (if available):
272
+
273
+ >>> predictions, forecasters = n2n_predict(
274
+ ... forecast_horizon=24,
275
+ ... force_train=False,
276
+ ... model_dir="./saved_models",
277
+ ... verbose=True
278
+ ... )
279
+
280
+ Force retraining and update cache:
281
+
282
+ >>> predictions, forecasters = n2n_predict(
283
+ ... forecast_horizon=24,
284
+ ... force_train=True,
285
+ ... model_dir="./saved_models",
286
+ ... verbose=True
287
+ ... )
288
+
289
+ With specific target columns:
290
+
291
+ >>> predictions, forecasters = n2n_predict(
292
+ ... columns=["power", "energy"],
293
+ ... forecast_horizon=48,
294
+ ... force_train=False,
295
+ ... verbose=True
296
+ ... )
297
+
298
+ Notes:
299
+ - Trained models are saved to disk using joblib for fast reuse.
300
+ - When force_train=False, existing models are loaded and prediction
301
+ proceeds without retraining. This significantly speeds up prediction
302
+ for repeated calls with the same configuration.
303
+ - The model_dir directory is created automatically if it doesn't exist.
304
+
305
+ Performance Notes:
306
+ - First run: Full training (~2-5 minutes depending on data size)
307
+ - Subsequent runs (force_train=False): Model loading only (~1-2 seconds)
308
+ - Force retrain (force_train=True): Full training again (~2-5 minutes)
309
+ """
310
+ if columns is not None:
311
+ TARGET = columns
312
+ else:
313
+ TARGET = None
314
+
315
+ if verbose:
316
+ print("--- Starting n2n_predict ---")
317
+ print("Fetching data...")
318
+
319
+ # Fetch data
320
+ if data is not None:
321
+ if TARGET is not None:
322
+ data = data[TARGET]
323
+ else:
324
+ data = fetch_data(columns=TARGET)
325
+
326
+ START, END, COV_START, COV_END = get_start_end(
327
+ data=data,
328
+ forecast_horizon=forecast_horizon,
329
+ verbose=verbose,
330
+ )
331
+
332
+ basic_ts_checks(data, verbose=verbose)
333
+
334
+ data = agg_and_resample_data(data, verbose=verbose)
335
+
336
+ # --- Outlier Handling ---
337
+ if verbose:
338
+ print("Handling outliers...")
339
+
340
+ # data_old = data.copy() # kept in notebook, maybe useful for debugging but not used logic-wise here
341
+ data, outliers = mark_outliers(
342
+ data, contamination=contamination, random_state=1234, verbose=verbose
343
+ )
344
+
345
+ # --- Missing Data (Imputation) ---
346
+ if verbose:
347
+ print("Imputing missing data...")
348
+
349
+ missing_indices = data.index[data.isnull().any(axis=1)]
350
+ if verbose:
351
+ n_missing = len(missing_indices)
352
+ pct_missing = (n_missing / len(data)) * 100
353
+ print(f"Number of rows with missing values: {n_missing}")
354
+ print(f"Percentage of rows with missing values: {pct_missing:.2f}%")
355
+
356
+ data = data.ffill()
357
+ data = data.bfill()
358
+
359
+ # --- Train, Val, Test Split ---
360
+ if verbose:
361
+ print("Splitting data...")
362
+ data_train, data_val, data_test = split_rel_train_val_test(
363
+ data, perc_train=0.8, perc_val=0.2, verbose=verbose
364
+ )
365
+
366
+ # --- Model Fit ---
367
+ if verbose:
368
+ print("Fitting models...")
369
+
370
+ end_validation = pd.concat([data_train, data_val]).index[-1]
371
+
372
+ baseline_forecasters = {}
373
+ targets_to_train = list(data.columns)
374
+
375
+ # Attempt to load cached models if force_train=False
376
+ if not force_train and _model_directory_exists(model_dir):
377
+ if verbose:
378
+ print(" Attempting to load cached models...")
379
+ cached_forecasters, missing_targets = _load_forecasters(
380
+ target_columns=list(data.columns),
381
+ model_dir=model_dir,
382
+ verbose=verbose,
383
+ )
384
+ baseline_forecasters.update(cached_forecasters)
385
+ targets_to_train = missing_targets
386
+
387
+ if len(cached_forecasters) == len(data.columns):
388
+ if verbose:
389
+ print(f" ✓ All {len(data.columns)} forecasters loaded from cache")
390
+ elif len(cached_forecasters) > 0:
391
+ if verbose:
392
+ print(
393
+ f" ✓ Loaded {len(cached_forecasters)} forecasters, "
394
+ f"will train {len(targets_to_train)} new ones"
395
+ )
396
+
397
+ # Train missing or forced models
398
+ if len(targets_to_train) > 0:
399
+ if force_train and len(baseline_forecasters) > 0:
400
+ if verbose:
401
+ print(f" Force retraining all {len(data.columns)} forecasters...")
402
+ targets_to_train = list(data.columns)
403
+ baseline_forecasters.clear()
404
+
405
+ target_iter = targets_to_train
406
+ if show_progress and tqdm is not None:
407
+ target_iter = tqdm(
408
+ targets_to_train,
409
+ desc="Training forecasters",
410
+ unit="model",
411
+ )
412
+
413
+ for target in target_iter:
414
+ forecaster = ForecasterEquivalentDate(
415
+ offset=pd.DateOffset(days=1), n_offsets=1
416
+ )
417
+
418
+ forecaster.fit(y=data.loc[:end_validation, target])
419
+
420
+ baseline_forecasters[target] = forecaster
421
+
422
+ # Save newly trained models to disk
423
+ if verbose:
424
+ print(f" Saving {len(targets_to_train)} trained forecasters to disk...")
425
+ _save_forecasters(
426
+ forecasters={t: baseline_forecasters[t] for t in targets_to_train},
427
+ model_dir=model_dir,
428
+ verbose=verbose,
429
+ )
430
+
431
+ if verbose:
432
+ print(f" ✓ Total forecasters available: {len(baseline_forecasters)}")
433
+
434
+ # --- Predict ---
435
+ if verbose:
436
+ print("Generating predictions...")
437
+
438
+ predictions = predict_multivariate(
439
+ baseline_forecasters,
440
+ steps_ahead=forecast_horizon,
441
+ show_progress=show_progress,
442
+ )
443
+
444
+ return predictions, baseline_forecasters
@@ -727,6 +727,7 @@ def _model_directory_exists(model_dir: Union[str, Path]) -> bool:
727
727
 
728
728
 
729
729
  def n2n_predict_with_covariates(
730
+ data: Optional[pd.DataFrame] = None,
730
731
  forecast_horizon: int = 24,
731
732
  contamination: float = 0.01,
732
733
  window_size: int = 72,
@@ -744,7 +745,7 @@ def n2n_predict_with_covariates(
744
745
  force_train: bool = False,
745
746
  model_dir: Union[str, Path] = "./forecaster_models",
746
747
  verbose: bool = True,
747
- show_progress: bool = True,
748
+ show_progress: bool = False,
748
749
  ) -> Tuple[pd.DataFrame, Dict, Dict]:
749
750
  """End-to-end recursive forecasting with exogenous covariates.
750
751
 
@@ -763,6 +764,8 @@ def n2n_predict_with_covariates(
763
764
  Existing models are reused for prediction unless force_train=True.
764
765
 
765
766
  Args:
767
+ data: Optional DataFrame with target time series data. If None, fetches data automatically.
768
+ Default: None.
766
769
  forecast_horizon: Number of time steps to forecast ahead. Default: 24.
767
770
  contamination: Contamination parameter for outlier detection. Default: 0.01.
768
771
  window_size: Rolling window size for gap detection. Default: 72.
@@ -781,9 +784,9 @@ def n2n_predict_with_covariates(
781
784
  force_train: Force retraining of all models, ignoring cached models.
782
785
  Default: False.
783
786
  model_dir: Directory for saving/loading trained models.
784
- Default: "./forecaster_models".
787
+ Default: "./models_covariates".
785
788
  verbose: Print progress messages. Default: True.
786
- show_progress: Show progress bar during training. Default: True.
789
+ show_progress: Show progress bar during training. Default: False.
787
790
 
788
791
  Returns:
789
792
  Tuple containing:
@@ -865,7 +868,8 @@ def n2n_predict_with_covariates(
865
868
  if verbose:
866
869
  print("\n[1/9] Loading and preparing target data...")
867
870
 
868
- data = fetch_data()
871
+ if data is None:
872
+ data = fetch_data()
869
873
  target_columns = data.columns.tolist()
870
874
 
871
875
  if verbose:
@@ -913,6 +917,10 @@ def n2n_predict_with_covariates(
913
917
  """Return sample weights for given index."""
914
918
  return custom_weights(index, weights_series)
915
919
 
920
+ # Note: weight_func is a local function and cannot be pickled.
921
+ # Model persistence is disabled when using weight_func.
922
+ use_model_persistence = False
923
+
916
924
  # ========================================================================
917
925
  # 4. EXOGENOUS FEATURES ENGINEERING
918
926
  # ========================================================================
@@ -1070,11 +1078,11 @@ def n2n_predict_with_covariates(
1070
1078
  window_features = RollingFeatures(stats=["mean"], window_sizes=window_size)
1071
1079
  end_validation = pd.concat([data_train, data_val]).index[-1]
1072
1080
 
1073
- # Attempt to load cached models if force_train=False
1081
+ # Attempt to load cached models if force_train=False and persistence is enabled
1074
1082
  recursive_forecasters = {}
1075
1083
  targets_to_train = target_columns
1076
1084
 
1077
- if not force_train and _model_directory_exists(model_dir):
1085
+ if use_model_persistence and not force_train and _model_directory_exists(model_dir):
1078
1086
  if verbose:
1079
1087
  print(" Attempting to load cached models...")
1080
1088
  cached_forecasters, missing_targets = _load_forecasters(
@@ -1132,14 +1140,20 @@ def n2n_predict_with_covariates(
1132
1140
  if verbose:
1133
1141
  print(f" ✓ Forecaster trained for {target}")
1134
1142
 
1135
- # Save newly trained models to disk
1136
- if verbose:
1137
- print(f" Saving {len(targets_to_train)} trained forecasters to disk...")
1138
- _save_forecasters(
1139
- forecasters={t: recursive_forecasters[t] for t in targets_to_train},
1140
- model_dir=model_dir,
1141
- verbose=verbose,
1142
- )
1143
+ # Save newly trained models to disk (only if persistence is enabled)
1144
+ if use_model_persistence:
1145
+ if verbose:
1146
+ print(
1147
+ f" Saving {len(targets_to_train)} trained forecasters to disk..."
1148
+ )
1149
+ _save_forecasters(
1150
+ forecasters={t: recursive_forecasters[t] for t in targets_to_train},
1151
+ model_dir=model_dir,
1152
+ verbose=verbose,
1153
+ )
1154
+ else:
1155
+ if verbose:
1156
+ print(" ⚠ Model persistence disabled (weight_func cannot be pickled)")
1143
1157
 
1144
1158
  if verbose:
1145
1159
  print(f" ✓ Total forecasters available: {len(recursive_forecasters)}")
@@ -1,126 +0,0 @@
1
- import pandas as pd
2
- from typing import List, Optional
3
- from spotforecast2.forecaster.recursive import ForecasterEquivalentDate
4
- from spotforecast2.data.fetch_data import fetch_data
5
- from spotforecast2.preprocessing.curate_data import basic_ts_checks
6
- from spotforecast2.preprocessing.curate_data import agg_and_resample_data
7
- from spotforecast2.preprocessing.outlier import mark_outliers
8
-
9
- from spotforecast2.preprocessing.split import split_rel_train_val_test
10
- from spotforecast2.forecaster.utils import predict_multivariate
11
- from spotforecast2.preprocessing.curate_data import get_start_end
12
-
13
- try:
14
- from tqdm.auto import tqdm
15
- except ImportError: # pragma: no cover - fallback when tqdm is not installed
16
- tqdm = None
17
-
18
-
19
- def n2n_predict(
20
- columns: Optional[List[str]] = None,
21
- forecast_horizon: int = 24,
22
- contamination: float = 0.01,
23
- window_size: int = 72,
24
- verbose: bool = True,
25
- show_progress: bool = True,
26
- ) -> pd.DataFrame:
27
- """
28
- End-to-end prediction function replicating the workflow from 01_base_predictor combined with fetch_data.
29
-
30
- Args:
31
- columns: List of target columns to forecast. If None, uses a default set (defined internally or from data).
32
- Note: fetch_data supports None to return all columns.
33
- forecast_horizon: Number of steps to forecast.
34
- contamination: Contamination factor for outlier detection.
35
- window_size: Window size for weighting (not fully utilized in main flow but kept for consistency).
36
- verbose: Whether to print progress logs.
37
- show_progress: Show progress bar during training and prediction.
38
-
39
- Returns:
40
- pd.DataFrame: The multi-output predictions.
41
- """
42
- if columns is not None:
43
- TARGET = columns
44
- else:
45
- TARGET = None
46
-
47
- if verbose:
48
- print("--- Starting n2n_predict ---")
49
- print("Fetching data...")
50
-
51
- # Fetch data
52
- data = fetch_data(columns=TARGET)
53
-
54
- START, END, COV_START, COV_END = get_start_end(
55
- data=data,
56
- forecast_horizon=forecast_horizon,
57
- verbose=verbose,
58
- )
59
-
60
- basic_ts_checks(data, verbose=verbose)
61
-
62
- data = agg_and_resample_data(data, verbose=verbose)
63
-
64
- # --- Outlier Handling ---
65
- if verbose:
66
- print("Handling outliers...")
67
-
68
- # data_old = data.copy() # kept in notebook, maybe useful for debugging but not used logic-wise here
69
- data, outliers = mark_outliers(
70
- data, contamination=contamination, random_state=1234, verbose=verbose
71
- )
72
-
73
- # --- Missing Data (Imputation) ---
74
- if verbose:
75
- print("Imputing missing data...")
76
-
77
- missing_indices = data.index[data.isnull().any(axis=1)]
78
- if verbose:
79
- n_missing = len(missing_indices)
80
- pct_missing = (n_missing / len(data)) * 100
81
- print(f"Number of rows with missing values: {n_missing}")
82
- print(f"Percentage of rows with missing values: {pct_missing:.2f}%")
83
-
84
- data = data.ffill()
85
- data = data.bfill()
86
-
87
- # --- Train, Val, Test Split ---
88
- if verbose:
89
- print("Splitting data...")
90
- data_train, data_val, data_test = split_rel_train_val_test(
91
- data, perc_train=0.8, perc_val=0.2, verbose=verbose
92
- )
93
-
94
- # --- Model Fit ---
95
- if verbose:
96
- print("Fitting models...")
97
-
98
- end_validation = pd.concat([data_train, data_val]).index[-1]
99
-
100
- baseline_forecasters = {}
101
-
102
- target_iter = data.columns
103
- if show_progress and tqdm is not None:
104
- target_iter = tqdm(data.columns, desc="Training forecasters", unit="model")
105
-
106
- for target in target_iter:
107
- forecaster = ForecasterEquivalentDate(offset=pd.DateOffset(days=1), n_offsets=1)
108
-
109
- forecaster.fit(y=data.loc[:end_validation, target])
110
-
111
- baseline_forecasters[target] = forecaster
112
-
113
- if verbose:
114
- print("✓ Multi-output baseline system trained")
115
-
116
- # --- Predict ---
117
- if verbose:
118
- print("Generating predictions...")
119
-
120
- predictions = predict_multivariate(
121
- baseline_forecasters,
122
- steps_ahead=forecast_horizon,
123
- show_progress=show_progress,
124
- )
125
-
126
- return predictions
File without changes