spotforecast2 0.0.1__tar.gz → 0.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/PKG-INFO +1 -1
  2. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/pyproject.toml +1 -1
  3. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/data/fetch_data.py +6 -5
  4. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/forecaster/utils.py +17 -1
  5. spotforecast2-0.0.3/src/spotforecast2/processing/agg_predict.py +61 -0
  6. spotforecast2-0.0.3/src/spotforecast2/processing/n2n_predict.py +126 -0
  7. spotforecast2-0.0.3/src/spotforecast2/processing/n2n_predict_with_covariates.py +937 -0
  8. spotforecast2-0.0.3/src/spotforecast2/weather/__init__.py +5 -0
  9. spotforecast2-0.0.1/src/spotforecast2/weather/__init__.py +0 -0
  10. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/README.md +0 -0
  11. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/.DS_Store +0 -0
  12. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/__init__.py +0 -0
  13. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/data/__init__.py +0 -0
  14. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/data/data.py +0 -0
  15. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/exceptions.py +0 -0
  16. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/forecaster/.DS_Store +0 -0
  17. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/forecaster/__init__.py +0 -0
  18. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/forecaster/base.py +0 -0
  19. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/forecaster/metrics.py +0 -0
  20. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/forecaster/recursive/__init__.py +0 -0
  21. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/forecaster/recursive/_forecaster_equivalent_date.py +0 -0
  22. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/forecaster/recursive/_forecaster_recursive.py +0 -0
  23. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/forecaster/recursive/_warnings.py +0 -0
  24. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/model_selection/__init__.py +0 -0
  25. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/model_selection/bayesian_search.py +0 -0
  26. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/model_selection/grid_search.py +0 -0
  27. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/model_selection/random_search.py +0 -0
  28. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/model_selection/split_base.py +0 -0
  29. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/model_selection/split_one_step.py +0 -0
  30. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/model_selection/split_ts_cv.py +0 -0
  31. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/model_selection/utils_common.py +0 -0
  32. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/model_selection/utils_metrics.py +0 -0
  33. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/model_selection/validation.py +0 -0
  34. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/preprocessing/__init__.py +0 -0
  35. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/preprocessing/_binner.py +0 -0
  36. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/preprocessing/_common.py +0 -0
  37. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/preprocessing/_differentiator.py +0 -0
  38. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/preprocessing/_rolling.py +0 -0
  39. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/preprocessing/curate_data.py +0 -0
  40. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/preprocessing/imputation.py +0 -0
  41. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/preprocessing/outlier.py +0 -0
  42. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/preprocessing/split.py +0 -0
  43. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/py.typed +0 -0
  44. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/utils/__init__.py +0 -0
  45. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/utils/convert_to_utc.py +0 -0
  46. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/utils/data_transform.py +0 -0
  47. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/utils/forecaster_config.py +0 -0
  48. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/utils/generate_holiday.py +0 -0
  49. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/utils/validation.py +0 -0
  50. {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/weather/weather_client.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: spotforecast2
3
- Version: 0.0.1
3
+ Version: 0.0.3
4
4
  Summary: Forecasting with spot
5
5
  Author: bartzbeielstein
6
6
  Author-email: bartzbeielstein <32470350+bartzbeielstein@users.noreply.github.com>
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "spotforecast2"
3
- version = "0.0.1"
3
+ version = "0.0.3"
4
4
  description = "Forecasting with spot"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -44,7 +44,7 @@ def get_data_home(data_home: Optional[Union[str, Path]] = None) -> Path:
44
44
 
45
45
 
46
46
  def fetch_data(
47
- filename: str = "integrated_raw_data.csv",
47
+ filename: str = "data_in.csv",
48
48
  columns: Optional[list] = None,
49
49
  index_col: int = 0,
50
50
  parse_dates: bool = True,
@@ -56,8 +56,9 @@ def fetch_data(
56
56
  Args:
57
57
  filename (str):
58
58
  Filename of the CSV file containing the dataset. It must be located in the data home directory, which can be get or set using `get_data_home()`.
59
- columns (list):
60
- List of columns to be included in the dataset. Must be specified.
59
+ columns (list, optional):
60
+ List of columns to be included in the dataset. If None, all columns are included.
61
+ If an empty list is provided, a ValueError is blocked.
61
62
  index_col (int):
62
63
  Column index to be used as the index.
63
64
  parse_dates (bool):
@@ -71,7 +72,7 @@ def fetch_data(
71
72
  pd.DataFrame: The integrated raw dataset.
72
73
 
73
74
  Raises:
74
- ValueError: If columns is None or empty.
75
+ ValueError: If columns is an empty list.
75
76
 
76
77
  Examples:
77
78
  >>> from spotforecast2.data.fetch_data import fetch_data
@@ -79,7 +80,7 @@ def fetch_data(
79
80
  >>> data.head()
80
81
  Header1 Header2 Header3
81
82
  """
82
- if columns is None or len(columns) == 0:
83
+ if columns is not None and len(columns) == 0:
83
84
  raise ValueError("columns must be specified and cannot be empty.")
84
85
 
85
86
  csv_path = get_data_home() / filename
@@ -20,6 +20,11 @@ from spotforecast2.utils import (
20
20
  )
21
21
  from spotforecast2.exceptions import set_skforecast_warnings, UnknownLevelWarning
22
22
 
23
+ try:
24
+ from tqdm.auto import tqdm
25
+ except ImportError: # pragma: no cover - fallback when tqdm is not installed
26
+ tqdm = None
27
+
23
28
 
24
29
  def check_preprocess_series(series):
25
30
  pass
@@ -785,6 +790,7 @@ def predict_multivariate(
785
790
  forecasters: dict[str, Any],
786
791
  steps_ahead: int,
787
792
  exog: pd.DataFrame | None = None,
793
+ show_progress: bool = False,
788
794
  ) -> pd.DataFrame:
789
795
  """
790
796
  Generate multi-output predictions using multiple baseline forecasters.
@@ -796,6 +802,8 @@ def predict_multivariate(
796
802
  steps_ahead (int): Number of steps to forecast.
797
803
  exog (pd.DataFrame, optional): Exogenous variables for prediction.
798
804
  If provided, will be passed to each forecaster's predict method.
805
+ show_progress (bool, optional): Show progress bar while predicting
806
+ per target forecaster. Default: False.
799
807
 
800
808
  Returns:
801
809
  pd.DataFrame: DataFrame with predictions for all targets.
@@ -824,7 +832,15 @@ def predict_multivariate(
824
832
 
825
833
  predictions = {}
826
834
 
827
- for target, forecaster in forecasters.items():
835
+ target_iter = forecasters.items()
836
+ if show_progress and tqdm is not None:
837
+ target_iter = tqdm(
838
+ forecasters.items(),
839
+ desc="Predicting targets",
840
+ unit="model",
841
+ )
842
+
843
+ for target, forecaster in target_iter:
828
844
  # Generate predictions for this target
829
845
  if exog is not None:
830
846
  pred = forecaster.predict(steps=steps_ahead, exog=exog)
@@ -0,0 +1,61 @@
1
+ from typing import Dict, Optional, Union, List
2
+ import pandas as pd
3
+ import numpy as np
4
+
5
+
6
+ def agg_predict(
7
+ predictions: pd.DataFrame,
8
+ weights: Optional[Union[Dict[str, float], List[float], np.ndarray]] = None,
9
+ ) -> pd.Series:
10
+ """Aggregates multiple prediction columns into a single combined prediction series.
11
+
12
+ The combination is a weighted sum of the prediction columns. If no weights are provided,
13
+ a default weighting scheme based on specific predefined columns is used.
14
+
15
+ Args:
16
+ predictions (pd.DataFrame): DataFrame containing the prediction columns.
17
+ weights (Optional[Union[Dict[str, float], List[float], np.ndarray]]):
18
+ Dictionary mapping column names to their weights, or a list/array of weights
19
+ corresponding to the order of columns in `predictions`.
20
+ If None, defaults to summing all columns (weight=1.0 for each column).
21
+
22
+ Returns:
23
+ pd.Series: A Series containing the aggregated values.
24
+
25
+ Raises:
26
+ ValueError: If a column specified in weights (or default weights) is missing from predictions.
27
+ ValueError: If weights is a list/array and its length does not match the number of columns in predictions.
28
+
29
+ Examples:
30
+ >>> df = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
31
+ >>> agg_predict(df, weights={"A": 1.0, "B": -1.0})
32
+ 0 -2.0
33
+ 1 -2.0
34
+ dtype: float64
35
+ >>> agg_predict(df, weights=[0.5, 2.0])
36
+ 0 6.5
37
+ 1 9.0
38
+ dtype: float64
39
+ """
40
+ if weights is None:
41
+ # Default to summing all columns
42
+ weights = {col: 1.0 for col in predictions.columns}
43
+
44
+ if isinstance(weights, (list, np.ndarray)):
45
+ if len(weights) != len(predictions.columns):
46
+ raise ValueError(
47
+ f"Length of weights ({len(weights)}) does not match number of columns in predictions ({len(predictions.columns)})"
48
+ )
49
+ # Convert to dictionary using column order
50
+ weights = dict(zip(predictions.columns, weights))
51
+
52
+ combined = pd.Series(0.0, index=predictions.index)
53
+
54
+ missing_cols = [col for col in weights.keys() if col not in predictions.columns]
55
+ if missing_cols:
56
+ raise ValueError(f"Missing columns in predictions dataframe: {missing_cols}")
57
+
58
+ for col, weight in weights.items():
59
+ combined += predictions[col] * weight
60
+
61
+ return combined
@@ -0,0 +1,126 @@
1
+ import pandas as pd
2
+ from typing import List, Optional
3
+ from spotforecast2.forecaster.recursive import ForecasterEquivalentDate
4
+ from spotforecast2.data.fetch_data import fetch_data
5
+ from spotforecast2.preprocessing.curate_data import basic_ts_checks
6
+ from spotforecast2.preprocessing.curate_data import agg_and_resample_data
7
+ from spotforecast2.preprocessing.outlier import mark_outliers
8
+
9
+ from spotforecast2.preprocessing.split import split_rel_train_val_test
10
+ from spotforecast2.forecaster.utils import predict_multivariate
11
+ from spotforecast2.preprocessing.curate_data import get_start_end
12
+
13
+ try:
14
+ from tqdm.auto import tqdm
15
+ except ImportError: # pragma: no cover - fallback when tqdm is not installed
16
+ tqdm = None
17
+
18
+
19
+ def n2n_predict(
20
+ columns: Optional[List[str]] = None,
21
+ forecast_horizon: int = 24,
22
+ contamination: float = 0.01,
23
+ window_size: int = 72,
24
+ verbose: bool = True,
25
+ show_progress: bool = True,
26
+ ) -> pd.DataFrame:
27
+ """
28
+ End-to-end prediction function replicating the workflow from 01_base_predictor combined with fetch_data.
29
+
30
+ Args:
31
+ columns: List of target columns to forecast. If None, uses a default set (defined internally or from data).
32
+ Note: fetch_data supports None to return all columns.
33
+ forecast_horizon: Number of steps to forecast.
34
+ contamination: Contamination factor for outlier detection.
35
+ window_size: Window size for weighting (not fully utilized in main flow but kept for consistency).
36
+ verbose: Whether to print progress logs.
37
+ show_progress: Show progress bar during training and prediction.
38
+
39
+ Returns:
40
+ pd.DataFrame: The multi-output predictions.
41
+ """
42
+ if columns is not None:
43
+ TARGET = columns
44
+ else:
45
+ TARGET = None
46
+
47
+ if verbose:
48
+ print("--- Starting n2n_predict ---")
49
+ print("Fetching data...")
50
+
51
+ # Fetch data
52
+ data = fetch_data(columns=TARGET)
53
+
54
+ START, END, COV_START, COV_END = get_start_end(
55
+ data=data,
56
+ forecast_horizon=forecast_horizon,
57
+ verbose=verbose,
58
+ )
59
+
60
+ basic_ts_checks(data, verbose=verbose)
61
+
62
+ data = agg_and_resample_data(data, verbose=verbose)
63
+
64
+ # --- Outlier Handling ---
65
+ if verbose:
66
+ print("Handling outliers...")
67
+
68
+ # data_old = data.copy() # kept in notebook, maybe useful for debugging but not used logic-wise here
69
+ data, outliers = mark_outliers(
70
+ data, contamination=contamination, random_state=1234, verbose=verbose
71
+ )
72
+
73
+ # --- Missing Data (Imputation) ---
74
+ if verbose:
75
+ print("Imputing missing data...")
76
+
77
+ missing_indices = data.index[data.isnull().any(axis=1)]
78
+ if verbose:
79
+ n_missing = len(missing_indices)
80
+ pct_missing = (n_missing / len(data)) * 100
81
+ print(f"Number of rows with missing values: {n_missing}")
82
+ print(f"Percentage of rows with missing values: {pct_missing:.2f}%")
83
+
84
+ data = data.ffill()
85
+ data = data.bfill()
86
+
87
+ # --- Train, Val, Test Split ---
88
+ if verbose:
89
+ print("Splitting data...")
90
+ data_train, data_val, data_test = split_rel_train_val_test(
91
+ data, perc_train=0.8, perc_val=0.2, verbose=verbose
92
+ )
93
+
94
+ # --- Model Fit ---
95
+ if verbose:
96
+ print("Fitting models...")
97
+
98
+ end_validation = pd.concat([data_train, data_val]).index[-1]
99
+
100
+ baseline_forecasters = {}
101
+
102
+ target_iter = data.columns
103
+ if show_progress and tqdm is not None:
104
+ target_iter = tqdm(data.columns, desc="Training forecasters", unit="model")
105
+
106
+ for target in target_iter:
107
+ forecaster = ForecasterEquivalentDate(offset=pd.DateOffset(days=1), n_offsets=1)
108
+
109
+ forecaster.fit(y=data.loc[:end_validation, target])
110
+
111
+ baseline_forecasters[target] = forecaster
112
+
113
+ if verbose:
114
+ print("✓ Multi-output baseline system trained")
115
+
116
+ # --- Predict ---
117
+ if verbose:
118
+ print("Generating predictions...")
119
+
120
+ predictions = predict_multivariate(
121
+ baseline_forecasters,
122
+ steps_ahead=forecast_horizon,
123
+ show_progress=show_progress,
124
+ )
125
+
126
+ return predictions