spotforecast2 0.0.1__tar.gz → 0.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/PKG-INFO +1 -1
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/pyproject.toml +1 -1
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/data/fetch_data.py +6 -5
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/forecaster/utils.py +17 -1
- spotforecast2-0.0.3/src/spotforecast2/processing/agg_predict.py +61 -0
- spotforecast2-0.0.3/src/spotforecast2/processing/n2n_predict.py +126 -0
- spotforecast2-0.0.3/src/spotforecast2/processing/n2n_predict_with_covariates.py +937 -0
- spotforecast2-0.0.3/src/spotforecast2/weather/__init__.py +5 -0
- spotforecast2-0.0.1/src/spotforecast2/weather/__init__.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/README.md +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/.DS_Store +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/__init__.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/data/__init__.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/data/data.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/exceptions.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/forecaster/.DS_Store +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/forecaster/__init__.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/forecaster/base.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/forecaster/metrics.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/forecaster/recursive/__init__.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/forecaster/recursive/_forecaster_equivalent_date.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/forecaster/recursive/_forecaster_recursive.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/forecaster/recursive/_warnings.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/model_selection/__init__.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/model_selection/bayesian_search.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/model_selection/grid_search.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/model_selection/random_search.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/model_selection/split_base.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/model_selection/split_one_step.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/model_selection/split_ts_cv.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/model_selection/utils_common.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/model_selection/utils_metrics.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/model_selection/validation.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/preprocessing/__init__.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/preprocessing/_binner.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/preprocessing/_common.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/preprocessing/_differentiator.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/preprocessing/_rolling.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/preprocessing/curate_data.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/preprocessing/imputation.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/preprocessing/outlier.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/preprocessing/split.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/py.typed +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/utils/__init__.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/utils/convert_to_utc.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/utils/data_transform.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/utils/forecaster_config.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/utils/generate_holiday.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/utils/validation.py +0 -0
- {spotforecast2-0.0.1 → spotforecast2-0.0.3}/src/spotforecast2/weather/weather_client.py +0 -0
|
@@ -44,7 +44,7 @@ def get_data_home(data_home: Optional[Union[str, Path]] = None) -> Path:
|
|
|
44
44
|
|
|
45
45
|
|
|
46
46
|
def fetch_data(
|
|
47
|
-
filename: str = "
|
|
47
|
+
filename: str = "data_in.csv",
|
|
48
48
|
columns: Optional[list] = None,
|
|
49
49
|
index_col: int = 0,
|
|
50
50
|
parse_dates: bool = True,
|
|
@@ -56,8 +56,9 @@ def fetch_data(
|
|
|
56
56
|
Args:
|
|
57
57
|
filename (str):
|
|
58
58
|
Filename of the CSV file containing the dataset. It must be located in the data home directory, which can be get or set using `get_data_home()`.
|
|
59
|
-
columns (list):
|
|
60
|
-
List of columns to be included in the dataset.
|
|
59
|
+
columns (list, optional):
|
|
60
|
+
List of columns to be included in the dataset. If None, all columns are included.
|
|
61
|
+
If an empty list is provided, a ValueError is blocked.
|
|
61
62
|
index_col (int):
|
|
62
63
|
Column index to be used as the index.
|
|
63
64
|
parse_dates (bool):
|
|
@@ -71,7 +72,7 @@ def fetch_data(
|
|
|
71
72
|
pd.DataFrame: The integrated raw dataset.
|
|
72
73
|
|
|
73
74
|
Raises:
|
|
74
|
-
ValueError: If columns is
|
|
75
|
+
ValueError: If columns is an empty list.
|
|
75
76
|
|
|
76
77
|
Examples:
|
|
77
78
|
>>> from spotforecast2.data.fetch_data import fetch_data
|
|
@@ -79,7 +80,7 @@ def fetch_data(
|
|
|
79
80
|
>>> data.head()
|
|
80
81
|
Header1 Header2 Header3
|
|
81
82
|
"""
|
|
82
|
-
if columns is None
|
|
83
|
+
if columns is not None and len(columns) == 0:
|
|
83
84
|
raise ValueError("columns must be specified and cannot be empty.")
|
|
84
85
|
|
|
85
86
|
csv_path = get_data_home() / filename
|
|
@@ -20,6 +20,11 @@ from spotforecast2.utils import (
|
|
|
20
20
|
)
|
|
21
21
|
from spotforecast2.exceptions import set_skforecast_warnings, UnknownLevelWarning
|
|
22
22
|
|
|
23
|
+
try:
|
|
24
|
+
from tqdm.auto import tqdm
|
|
25
|
+
except ImportError: # pragma: no cover - fallback when tqdm is not installed
|
|
26
|
+
tqdm = None
|
|
27
|
+
|
|
23
28
|
|
|
24
29
|
def check_preprocess_series(series):
|
|
25
30
|
pass
|
|
@@ -785,6 +790,7 @@ def predict_multivariate(
|
|
|
785
790
|
forecasters: dict[str, Any],
|
|
786
791
|
steps_ahead: int,
|
|
787
792
|
exog: pd.DataFrame | None = None,
|
|
793
|
+
show_progress: bool = False,
|
|
788
794
|
) -> pd.DataFrame:
|
|
789
795
|
"""
|
|
790
796
|
Generate multi-output predictions using multiple baseline forecasters.
|
|
@@ -796,6 +802,8 @@ def predict_multivariate(
|
|
|
796
802
|
steps_ahead (int): Number of steps to forecast.
|
|
797
803
|
exog (pd.DataFrame, optional): Exogenous variables for prediction.
|
|
798
804
|
If provided, will be passed to each forecaster's predict method.
|
|
805
|
+
show_progress (bool, optional): Show progress bar while predicting
|
|
806
|
+
per target forecaster. Default: False.
|
|
799
807
|
|
|
800
808
|
Returns:
|
|
801
809
|
pd.DataFrame: DataFrame with predictions for all targets.
|
|
@@ -824,7 +832,15 @@ def predict_multivariate(
|
|
|
824
832
|
|
|
825
833
|
predictions = {}
|
|
826
834
|
|
|
827
|
-
|
|
835
|
+
target_iter = forecasters.items()
|
|
836
|
+
if show_progress and tqdm is not None:
|
|
837
|
+
target_iter = tqdm(
|
|
838
|
+
forecasters.items(),
|
|
839
|
+
desc="Predicting targets",
|
|
840
|
+
unit="model",
|
|
841
|
+
)
|
|
842
|
+
|
|
843
|
+
for target, forecaster in target_iter:
|
|
828
844
|
# Generate predictions for this target
|
|
829
845
|
if exog is not None:
|
|
830
846
|
pred = forecaster.predict(steps=steps_ahead, exog=exog)
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
from typing import Dict, Optional, Union, List
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def agg_predict(
|
|
7
|
+
predictions: pd.DataFrame,
|
|
8
|
+
weights: Optional[Union[Dict[str, float], List[float], np.ndarray]] = None,
|
|
9
|
+
) -> pd.Series:
|
|
10
|
+
"""Aggregates multiple prediction columns into a single combined prediction series.
|
|
11
|
+
|
|
12
|
+
The combination is a weighted sum of the prediction columns. If no weights are provided,
|
|
13
|
+
a default weighting scheme based on specific predefined columns is used.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
predictions (pd.DataFrame): DataFrame containing the prediction columns.
|
|
17
|
+
weights (Optional[Union[Dict[str, float], List[float], np.ndarray]]):
|
|
18
|
+
Dictionary mapping column names to their weights, or a list/array of weights
|
|
19
|
+
corresponding to the order of columns in `predictions`.
|
|
20
|
+
If None, defaults to summing all columns (weight=1.0 for each column).
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
pd.Series: A Series containing the aggregated values.
|
|
24
|
+
|
|
25
|
+
Raises:
|
|
26
|
+
ValueError: If a column specified in weights (or default weights) is missing from predictions.
|
|
27
|
+
ValueError: If weights is a list/array and its length does not match the number of columns in predictions.
|
|
28
|
+
|
|
29
|
+
Examples:
|
|
30
|
+
>>> df = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
|
|
31
|
+
>>> agg_predict(df, weights={"A": 1.0, "B": -1.0})
|
|
32
|
+
0 -2.0
|
|
33
|
+
1 -2.0
|
|
34
|
+
dtype: float64
|
|
35
|
+
>>> agg_predict(df, weights=[0.5, 2.0])
|
|
36
|
+
0 6.5
|
|
37
|
+
1 9.0
|
|
38
|
+
dtype: float64
|
|
39
|
+
"""
|
|
40
|
+
if weights is None:
|
|
41
|
+
# Default to summing all columns
|
|
42
|
+
weights = {col: 1.0 for col in predictions.columns}
|
|
43
|
+
|
|
44
|
+
if isinstance(weights, (list, np.ndarray)):
|
|
45
|
+
if len(weights) != len(predictions.columns):
|
|
46
|
+
raise ValueError(
|
|
47
|
+
f"Length of weights ({len(weights)}) does not match number of columns in predictions ({len(predictions.columns)})"
|
|
48
|
+
)
|
|
49
|
+
# Convert to dictionary using column order
|
|
50
|
+
weights = dict(zip(predictions.columns, weights))
|
|
51
|
+
|
|
52
|
+
combined = pd.Series(0.0, index=predictions.index)
|
|
53
|
+
|
|
54
|
+
missing_cols = [col for col in weights.keys() if col not in predictions.columns]
|
|
55
|
+
if missing_cols:
|
|
56
|
+
raise ValueError(f"Missing columns in predictions dataframe: {missing_cols}")
|
|
57
|
+
|
|
58
|
+
for col, weight in weights.items():
|
|
59
|
+
combined += predictions[col] * weight
|
|
60
|
+
|
|
61
|
+
return combined
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
from spotforecast2.forecaster.recursive import ForecasterEquivalentDate
|
|
4
|
+
from spotforecast2.data.fetch_data import fetch_data
|
|
5
|
+
from spotforecast2.preprocessing.curate_data import basic_ts_checks
|
|
6
|
+
from spotforecast2.preprocessing.curate_data import agg_and_resample_data
|
|
7
|
+
from spotforecast2.preprocessing.outlier import mark_outliers
|
|
8
|
+
|
|
9
|
+
from spotforecast2.preprocessing.split import split_rel_train_val_test
|
|
10
|
+
from spotforecast2.forecaster.utils import predict_multivariate
|
|
11
|
+
from spotforecast2.preprocessing.curate_data import get_start_end
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
from tqdm.auto import tqdm
|
|
15
|
+
except ImportError: # pragma: no cover - fallback when tqdm is not installed
|
|
16
|
+
tqdm = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def n2n_predict(
|
|
20
|
+
columns: Optional[List[str]] = None,
|
|
21
|
+
forecast_horizon: int = 24,
|
|
22
|
+
contamination: float = 0.01,
|
|
23
|
+
window_size: int = 72,
|
|
24
|
+
verbose: bool = True,
|
|
25
|
+
show_progress: bool = True,
|
|
26
|
+
) -> pd.DataFrame:
|
|
27
|
+
"""
|
|
28
|
+
End-to-end prediction function replicating the workflow from 01_base_predictor combined with fetch_data.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
columns: List of target columns to forecast. If None, uses a default set (defined internally or from data).
|
|
32
|
+
Note: fetch_data supports None to return all columns.
|
|
33
|
+
forecast_horizon: Number of steps to forecast.
|
|
34
|
+
contamination: Contamination factor for outlier detection.
|
|
35
|
+
window_size: Window size for weighting (not fully utilized in main flow but kept for consistency).
|
|
36
|
+
verbose: Whether to print progress logs.
|
|
37
|
+
show_progress: Show progress bar during training and prediction.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
pd.DataFrame: The multi-output predictions.
|
|
41
|
+
"""
|
|
42
|
+
if columns is not None:
|
|
43
|
+
TARGET = columns
|
|
44
|
+
else:
|
|
45
|
+
TARGET = None
|
|
46
|
+
|
|
47
|
+
if verbose:
|
|
48
|
+
print("--- Starting n2n_predict ---")
|
|
49
|
+
print("Fetching data...")
|
|
50
|
+
|
|
51
|
+
# Fetch data
|
|
52
|
+
data = fetch_data(columns=TARGET)
|
|
53
|
+
|
|
54
|
+
START, END, COV_START, COV_END = get_start_end(
|
|
55
|
+
data=data,
|
|
56
|
+
forecast_horizon=forecast_horizon,
|
|
57
|
+
verbose=verbose,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
basic_ts_checks(data, verbose=verbose)
|
|
61
|
+
|
|
62
|
+
data = agg_and_resample_data(data, verbose=verbose)
|
|
63
|
+
|
|
64
|
+
# --- Outlier Handling ---
|
|
65
|
+
if verbose:
|
|
66
|
+
print("Handling outliers...")
|
|
67
|
+
|
|
68
|
+
# data_old = data.copy() # kept in notebook, maybe useful for debugging but not used logic-wise here
|
|
69
|
+
data, outliers = mark_outliers(
|
|
70
|
+
data, contamination=contamination, random_state=1234, verbose=verbose
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# --- Missing Data (Imputation) ---
|
|
74
|
+
if verbose:
|
|
75
|
+
print("Imputing missing data...")
|
|
76
|
+
|
|
77
|
+
missing_indices = data.index[data.isnull().any(axis=1)]
|
|
78
|
+
if verbose:
|
|
79
|
+
n_missing = len(missing_indices)
|
|
80
|
+
pct_missing = (n_missing / len(data)) * 100
|
|
81
|
+
print(f"Number of rows with missing values: {n_missing}")
|
|
82
|
+
print(f"Percentage of rows with missing values: {pct_missing:.2f}%")
|
|
83
|
+
|
|
84
|
+
data = data.ffill()
|
|
85
|
+
data = data.bfill()
|
|
86
|
+
|
|
87
|
+
# --- Train, Val, Test Split ---
|
|
88
|
+
if verbose:
|
|
89
|
+
print("Splitting data...")
|
|
90
|
+
data_train, data_val, data_test = split_rel_train_val_test(
|
|
91
|
+
data, perc_train=0.8, perc_val=0.2, verbose=verbose
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# --- Model Fit ---
|
|
95
|
+
if verbose:
|
|
96
|
+
print("Fitting models...")
|
|
97
|
+
|
|
98
|
+
end_validation = pd.concat([data_train, data_val]).index[-1]
|
|
99
|
+
|
|
100
|
+
baseline_forecasters = {}
|
|
101
|
+
|
|
102
|
+
target_iter = data.columns
|
|
103
|
+
if show_progress and tqdm is not None:
|
|
104
|
+
target_iter = tqdm(data.columns, desc="Training forecasters", unit="model")
|
|
105
|
+
|
|
106
|
+
for target in target_iter:
|
|
107
|
+
forecaster = ForecasterEquivalentDate(offset=pd.DateOffset(days=1), n_offsets=1)
|
|
108
|
+
|
|
109
|
+
forecaster.fit(y=data.loc[:end_validation, target])
|
|
110
|
+
|
|
111
|
+
baseline_forecasters[target] = forecaster
|
|
112
|
+
|
|
113
|
+
if verbose:
|
|
114
|
+
print("✓ Multi-output baseline system trained")
|
|
115
|
+
|
|
116
|
+
# --- Predict ---
|
|
117
|
+
if verbose:
|
|
118
|
+
print("Generating predictions...")
|
|
119
|
+
|
|
120
|
+
predictions = predict_multivariate(
|
|
121
|
+
baseline_forecasters,
|
|
122
|
+
steps_ahead=forecast_horizon,
|
|
123
|
+
show_progress=show_progress,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
return predictions
|