spotforecast2 0.0.1__tar.gz → 0.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/PKG-INFO +1 -1
  2. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/pyproject.toml +1 -1
  3. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/data/fetch_data.py +6 -5
  4. spotforecast2-0.0.2/src/spotforecast2/processing/agg_predict.py +61 -0
  5. spotforecast2-0.0.2/src/spotforecast2/processing/n2n_predict.py +117 -0
  6. spotforecast2-0.0.1/src/spotforecast2/.DS_Store +0 -0
  7. spotforecast2-0.0.1/src/spotforecast2/forecaster/.DS_Store +0 -0
  8. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/README.md +0 -0
  9. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/__init__.py +0 -0
  10. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/data/__init__.py +0 -0
  11. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/data/data.py +0 -0
  12. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/exceptions.py +0 -0
  13. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/forecaster/__init__.py +0 -0
  14. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/forecaster/base.py +0 -0
  15. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/forecaster/metrics.py +0 -0
  16. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/forecaster/recursive/__init__.py +0 -0
  17. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/forecaster/recursive/_forecaster_equivalent_date.py +0 -0
  18. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/forecaster/recursive/_forecaster_recursive.py +0 -0
  19. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/forecaster/recursive/_warnings.py +0 -0
  20. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/forecaster/utils.py +0 -0
  21. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/model_selection/__init__.py +0 -0
  22. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/model_selection/bayesian_search.py +0 -0
  23. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/model_selection/grid_search.py +0 -0
  24. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/model_selection/random_search.py +0 -0
  25. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/model_selection/split_base.py +0 -0
  26. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/model_selection/split_one_step.py +0 -0
  27. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/model_selection/split_ts_cv.py +0 -0
  28. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/model_selection/utils_common.py +0 -0
  29. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/model_selection/utils_metrics.py +0 -0
  30. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/model_selection/validation.py +0 -0
  31. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/preprocessing/__init__.py +0 -0
  32. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/preprocessing/_binner.py +0 -0
  33. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/preprocessing/_common.py +0 -0
  34. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/preprocessing/_differentiator.py +0 -0
  35. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/preprocessing/_rolling.py +0 -0
  36. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/preprocessing/curate_data.py +0 -0
  37. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/preprocessing/imputation.py +0 -0
  38. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/preprocessing/outlier.py +0 -0
  39. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/preprocessing/split.py +0 -0
  40. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/py.typed +0 -0
  41. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/utils/__init__.py +0 -0
  42. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/utils/convert_to_utc.py +0 -0
  43. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/utils/data_transform.py +0 -0
  44. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/utils/forecaster_config.py +0 -0
  45. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/utils/generate_holiday.py +0 -0
  46. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/utils/validation.py +0 -0
  47. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/weather/__init__.py +0 -0
  48. {spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/weather/weather_client.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: spotforecast2
3
- Version: 0.0.1
3
+ Version: 0.0.2
4
4
  Summary: Forecasting with spot
5
5
  Author: bartzbeielstein
6
6
  Author-email: bartzbeielstein <32470350+bartzbeielstein@users.noreply.github.com>
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "spotforecast2"
3
- version = "0.0.1"
3
+ version = "0.0.2"
4
4
  description = "Forecasting with spot"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -44,7 +44,7 @@ def get_data_home(data_home: Optional[Union[str, Path]] = None) -> Path:
44
44
 
45
45
 
46
46
  def fetch_data(
47
- filename: str = "integrated_raw_data.csv",
47
+ filename: str = "data_in.csv",
48
48
  columns: Optional[list] = None,
49
49
  index_col: int = 0,
50
50
  parse_dates: bool = True,
@@ -56,8 +56,9 @@ def fetch_data(
56
56
  Args:
57
57
  filename (str):
58
58
  Filename of the CSV file containing the dataset. It must be located in the data home directory, which can be get or set using `get_data_home()`.
59
- columns (list):
60
- List of columns to be included in the dataset. Must be specified.
59
+ columns (list, optional):
60
+ List of columns to be included in the dataset. If None, all columns are included.
61
+ If an empty list is provided, a ValueError is blocked.
61
62
  index_col (int):
62
63
  Column index to be used as the index.
63
64
  parse_dates (bool):
@@ -71,7 +72,7 @@ def fetch_data(
71
72
  pd.DataFrame: The integrated raw dataset.
72
73
 
73
74
  Raises:
74
- ValueError: If columns is None or empty.
75
+ ValueError: If columns is an empty list.
75
76
 
76
77
  Examples:
77
78
  >>> from spotforecast2.data.fetch_data import fetch_data
@@ -79,7 +80,7 @@ def fetch_data(
79
80
  >>> data.head()
80
81
  Header1 Header2 Header3
81
82
  """
82
- if columns is None or len(columns) == 0:
83
+ if columns is not None and len(columns) == 0:
83
84
  raise ValueError("columns must be specified and cannot be empty.")
84
85
 
85
86
  csv_path = get_data_home() / filename
@@ -0,0 +1,61 @@
1
+ from typing import Dict, Optional, Union, List
2
+ import pandas as pd
3
+ import numpy as np
4
+
5
+
6
+ def agg_predict(
7
+ predictions: pd.DataFrame,
8
+ weights: Optional[Union[Dict[str, float], List[float], np.ndarray]] = None,
9
+ ) -> pd.Series:
10
+ """Aggregates multiple prediction columns into a single combined prediction series.
11
+
12
+ The combination is a weighted sum of the prediction columns. If no weights are provided,
13
+ a default weighting scheme based on specific predefined columns is used.
14
+
15
+ Args:
16
+ predictions (pd.DataFrame): DataFrame containing the prediction columns.
17
+ weights (Optional[Union[Dict[str, float], List[float], np.ndarray]]):
18
+ Dictionary mapping column names to their weights, or a list/array of weights
19
+ corresponding to the order of columns in `predictions`.
20
+ If None, defaults to summing all columns (weight=1.0 for each column).
21
+
22
+ Returns:
23
+ pd.Series: A Series containing the aggregated values.
24
+
25
+ Raises:
26
+ ValueError: If a column specified in weights (or default weights) is missing from predictions.
27
+ ValueError: If weights is a list/array and its length does not match the number of columns in predictions.
28
+
29
+ Examples:
30
+ >>> df = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
31
+ >>> agg_predict(df, weights={"A": 1.0, "B": -1.0})
32
+ 0 -2.0
33
+ 1 -2.0
34
+ dtype: float64
35
+ >>> agg_predict(df, weights=[0.5, 2.0])
36
+ 0 6.5
37
+ 1 9.0
38
+ dtype: float64
39
+ """
40
+ if weights is None:
41
+ # Default to summing all columns
42
+ weights = {col: 1.0 for col in predictions.columns}
43
+
44
+ if isinstance(weights, (list, np.ndarray)):
45
+ if len(weights) != len(predictions.columns):
46
+ raise ValueError(
47
+ f"Length of weights ({len(weights)}) does not match number of columns in predictions ({len(predictions.columns)})"
48
+ )
49
+ # Convert to dictionary using column order
50
+ weights = dict(zip(predictions.columns, weights))
51
+
52
+ combined = pd.Series(0.0, index=predictions.index)
53
+
54
+ missing_cols = [col for col in weights.keys() if col not in predictions.columns]
55
+ if missing_cols:
56
+ raise ValueError(f"Missing columns in predictions dataframe: {missing_cols}")
57
+
58
+ for col, weight in weights.items():
59
+ combined += predictions[col] * weight
60
+
61
+ return combined
@@ -0,0 +1,117 @@
1
+ import pandas as pd
2
+ from typing import List, Optional, Tuple, Dict, Any
3
+ from spotforecast2.forecaster.recursive import ForecasterEquivalentDate
4
+ from spotforecast2.data.fetch_data import fetch_data
5
+ from spotforecast2.preprocessing.curate_data import basic_ts_checks
6
+ from spotforecast2.preprocessing.curate_data import agg_and_resample_data
7
+ from spotforecast2.preprocessing.outlier import mark_outliers
8
+
9
+ from spotforecast2.preprocessing.split import split_rel_train_val_test
10
+ from spotforecast2.forecaster.utils import predict_multivariate
11
+ from spotforecast2.model_selection import TimeSeriesFold, backtesting_forecaster
12
+ from spotforecast2.preprocessing.curate_data import get_start_end
13
+
14
+
15
+ def n2n_predict(
16
+ columns: Optional[List[str]] = None,
17
+ forecast_horizon: int = 24,
18
+ contamination: float = 0.01,
19
+ window_size: int = 72,
20
+ verbose: bool = True,
21
+ ) -> Tuple[pd.DataFrame, Optional[Dict[str, Any]]]:
22
+ """
23
+ End-to-end prediction function replicating the workflow from 01_base_predictor combined with fetch_data.
24
+
25
+ Args:
26
+ columns: List of target columns to forecast. If None, uses a default set (defined internally or from data).
27
+ Note: fetch_data now supports None to return all columns.
28
+ forecast_horizon: Number of steps to forecast.
29
+ contamination: Contamination factor for outlier detection.
30
+ window_size: Window size for weighting (not fully utilized in main flow but kept for consistency).
31
+ verbose: Whether to print progress logs.
32
+
33
+ Returns:
34
+ Tuple containing:
35
+ - predictions (pd.DataFrame): The multi-output predictions.
36
+ - metrics (Optional[Dict]): Dictionary containing backtesting metrics if performed.
37
+ """
38
+ if columns is not None:
39
+ TARGET = columns
40
+ else:
41
+ TARGET = None
42
+
43
+ if verbose:
44
+ print("--- Starting n2n_predict ---")
45
+ print("Fetching data...")
46
+
47
+ # Fetch data
48
+ data = fetch_data(columns=TARGET)
49
+
50
+ START, END, COV_START, COV_END = get_start_end(
51
+ data=data,
52
+ forecast_horizon=forecast_horizon,
53
+ verbose=verbose,
54
+ )
55
+
56
+ basic_ts_checks(data, verbose=verbose)
57
+
58
+ data = agg_and_resample_data(data, verbose=verbose)
59
+
60
+ # --- Outlier Handling ---
61
+ if verbose:
62
+ print("Handling outliers...")
63
+
64
+ # data_old = data.copy() # kept in notebook, maybe useful for debugging but not used logic-wise here
65
+ data, outliers = mark_outliers(
66
+ data, contamination=contamination, random_state=1234, verbose=verbose
67
+ )
68
+
69
+ # --- Missing Data (Imputation) ---
70
+ if verbose:
71
+ print("Imputing missing data...")
72
+
73
+ missing_indices = data.index[data.isnull().any(axis=1)]
74
+ if verbose:
75
+ n_missing = len(missing_indices)
76
+ pct_missing = (n_missing / len(data)) * 100
77
+ print(f"Number of rows with missing values: {n_missing}")
78
+ print(f"Percentage of rows with missing values: {pct_missing:.2f}%")
79
+
80
+ data = data.ffill()
81
+ data = data.bfill()
82
+
83
+ # --- Train, Val, Test Split ---
84
+ if verbose:
85
+ print("Splitting data...")
86
+ data_train, data_val, data_test = split_rel_train_val_test(
87
+ data, perc_train=0.8, perc_val=0.2, verbose=verbose
88
+ )
89
+
90
+ # --- Model Fit ---
91
+ if verbose:
92
+ print("Fitting models...")
93
+
94
+ end_validation = pd.concat([data_train, data_val]).index[-1]
95
+
96
+ baseline_forecasters = {}
97
+
98
+ for target in data.columns:
99
+ forecaster = ForecasterEquivalentDate(offset=pd.DateOffset(days=1), n_offsets=1)
100
+
101
+ forecaster.fit(y=data.loc[:end_validation, target])
102
+
103
+ baseline_forecasters[target] = forecaster
104
+
105
+ if verbose:
106
+ print("✓ Multi-output baseline system trained")
107
+
108
+
109
+ # --- Predict ---
110
+ if verbose:
111
+ print("Generating predictions...")
112
+
113
+ predictions = predict_multivariate(
114
+ baseline_forecasters, steps_ahead=forecast_horizon
115
+ )
116
+
117
+ return predictions
File without changes