PyPI - spotforecast2 - Versions diffs - 0.0.1__tar.gz → 0.0.2__tar.gz - Mend

spotforecast2 0.0.1tar.gz → 0.0.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

{spotforecast2-0.0.1 → spotforecast2-0.0.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: spotforecast2
-Version: 0.0.1
+Version: 0.0.2
 Summary: Forecasting with spot
 Author: bartzbeielstein
 Author-email: bartzbeielstein <32470350+bartzbeielstein@users.noreply.github.com>

{spotforecast2-0.0.1 → spotforecast2-0.0.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "spotforecast2"
-version = "0.0.1"
+version = "0.0.2"
 description = "Forecasting with spot"
 readme = "README.md"
 authors = [

{spotforecast2-0.0.1 → spotforecast2-0.0.2}/src/spotforecast2/data/fetch_data.py RENAMED Viewed

@@ -44,7 +44,7 @@ def get_data_home(data_home: Optional[Union[str, Path]] = None) -> Path:
 def fetch_data(
-    filename: str = "integrated_raw_data.csv",
+    filename: str = "data_in.csv",
     columns: Optional[list] = None,
     index_col: int = 0,
     parse_dates: bool = True,
@@ -56,8 +56,9 @@ def fetch_data(
     Args:
         filename (str):
             Filename of the CSV file containing the dataset. It must be located in the data home directory, which can be get or set using `get_data_home()`.
-        columns (list):
-            List of columns to be included in the dataset. Must be specified.
+        columns (list, optional):
+            List of columns to be included in the dataset. If None, all columns are included.
+            If an empty list is provided, a ValueError is blocked.
         index_col (int):
             Column index to be used as the index.
         parse_dates (bool):
@@ -71,7 +72,7 @@ def fetch_data(
         pd.DataFrame: The integrated raw dataset.
     Raises:
-        ValueError: If columns is None or empty.
+        ValueError: If columns is an empty list.
     Examples:
         >>> from spotforecast2.data.fetch_data import fetch_data
@@ -79,7 +80,7 @@ def fetch_data(
         >>> data.head()
                         Header1  Header2  Header3
     """
-    if columns is None or len(columns) == 0:
+    if columns is not None and len(columns) == 0:
         raise ValueError("columns must be specified and cannot be empty.")
     csv_path = get_data_home() / filename

spotforecast2-0.0.2/src/spotforecast2/processing/agg_predict.py ADDED Viewed

@@ -0,0 +1,61 @@
+from typing import Dict, Optional, Union, List
+import pandas as pd
+import numpy as np
+def agg_predict(
+    predictions: pd.DataFrame,
+    weights: Optional[Union[Dict[str, float], List[float], np.ndarray]] = None,
+) -> pd.Series:
+    """Aggregates multiple prediction columns into a single combined prediction series.
+    The combination is a weighted sum of the prediction columns. If no weights are provided,
+    a default weighting scheme based on specific predefined columns is used.
+    Args:
+        predictions (pd.DataFrame): DataFrame containing the prediction columns.
+        weights (Optional[Union[Dict[str, float], List[float], np.ndarray]]):
+            Dictionary mapping column names to their weights, or a list/array of weights
+            corresponding to the order of columns in `predictions`.
+            If None, defaults to summing all columns (weight=1.0 for each column).
+    Returns:
+        pd.Series: A Series containing the aggregated values.
+    Raises:
+        ValueError: If a column specified in weights (or default weights) is missing from predictions.
+        ValueError: If weights is a list/array and its length does not match the number of columns in predictions.
+    Examples:
+        >>> df = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
+        >>> agg_predict(df, weights={"A": 1.0, "B": -1.0})
+        0   -2.0
+        1   -2.0
+        dtype: float64
+        >>> agg_predict(df, weights=[0.5, 2.0])
+        0    6.5
+        1    9.0
+        dtype: float64
+    """
+    if weights is None:
+        # Default to summing all columns
+        weights = {col: 1.0 for col in predictions.columns}
+    if isinstance(weights, (list, np.ndarray)):
+        if len(weights) != len(predictions.columns):
+            raise ValueError(
+                f"Length of weights ({len(weights)}) does not match number of columns in predictions ({len(predictions.columns)})"
+            )
+        # Convert to dictionary using column order
+        weights = dict(zip(predictions.columns, weights))
+    combined = pd.Series(0.0, index=predictions.index)
+    missing_cols = [col for col in weights.keys() if col not in predictions.columns]
+    if missing_cols:
+        raise ValueError(f"Missing columns in predictions dataframe: {missing_cols}")
+    for col, weight in weights.items():
+        combined += predictions[col] * weight
+    return combined

spotforecast2-0.0.2/src/spotforecast2/processing/n2n_predict.py ADDED Viewed

@@ -0,0 +1,117 @@
+import pandas as pd
+from typing import List, Optional, Tuple, Dict, Any
+from spotforecast2.forecaster.recursive import ForecasterEquivalentDate
+from spotforecast2.data.fetch_data import fetch_data
+from spotforecast2.preprocessing.curate_data import basic_ts_checks
+from spotforecast2.preprocessing.curate_data import agg_and_resample_data
+from spotforecast2.preprocessing.outlier import mark_outliers
+from spotforecast2.preprocessing.split import split_rel_train_val_test
+from spotforecast2.forecaster.utils import predict_multivariate
+from spotforecast2.model_selection import TimeSeriesFold, backtesting_forecaster
+from spotforecast2.preprocessing.curate_data import get_start_end
+def n2n_predict(
+    columns: Optional[List[str]] = None,
+    forecast_horizon: int = 24,
+    contamination: float = 0.01,
+    window_size: int = 72,
+    verbose: bool = True,
+) -> Tuple[pd.DataFrame, Optional[Dict[str, Any]]]:
+    """
+    End-to-end prediction function replicating the workflow from 01_base_predictor combined with fetch_data.
+    Args:
+        columns: List of target columns to forecast. If None, uses a default set (defined internally or from data).
+                 Note: fetch_data now supports None to return all columns.
+        forecast_horizon: Number of steps to forecast.
+        contamination: Contamination factor for outlier detection.
+        window_size: Window size for weighting (not fully utilized in main flow but kept for consistency).
+        verbose: Whether to print progress logs.
+    Returns:
+        Tuple containing:
+            - predictions (pd.DataFrame): The multi-output predictions.
+            - metrics (Optional[Dict]): Dictionary containing backtesting metrics if performed.
+    """
+    if columns is not None:
+        TARGET = columns
+    else:
+        TARGET = None
+    if verbose:
+        print("--- Starting n2n_predict ---")
+        print("Fetching data...")
+    # Fetch data
+    data = fetch_data(columns=TARGET)
+    START, END, COV_START, COV_END = get_start_end(
+        data=data,
+        forecast_horizon=forecast_horizon,
+        verbose=verbose,
+    )
+    basic_ts_checks(data, verbose=verbose)
+    data = agg_and_resample_data(data, verbose=verbose)
+    # --- Outlier Handling ---
+    if verbose:
+        print("Handling outliers...")
+    # data_old = data.copy() # kept in notebook, maybe useful for debugging but not used logic-wise here
+    data, outliers = mark_outliers(
+        data, contamination=contamination, random_state=1234, verbose=verbose
+    )
+    # --- Missing Data (Imputation) ---
+    if verbose:
+        print("Imputing missing data...")
+    missing_indices = data.index[data.isnull().any(axis=1)]
+    if verbose:
+        n_missing = len(missing_indices)
+        pct_missing = (n_missing / len(data)) * 100
+        print(f"Number of rows with missing values: {n_missing}")
+        print(f"Percentage of rows with missing values: {pct_missing:.2f}%")
+    data = data.ffill()
+    data = data.bfill()
+    # --- Train, Val, Test Split ---
+    if verbose:
+        print("Splitting data...")
+    data_train, data_val, data_test = split_rel_train_val_test(
+        data, perc_train=0.8, perc_val=0.2, verbose=verbose
+    )
+    # --- Model Fit ---
+    if verbose:
+        print("Fitting models...")
+    end_validation = pd.concat([data_train, data_val]).index[-1]
+    baseline_forecasters = {}
+    for target in data.columns:
+        forecaster = ForecasterEquivalentDate(offset=pd.DateOffset(days=1), n_offsets=1)
+        forecaster.fit(y=data.loc[:end_validation, target])
+        baseline_forecasters[target] = forecaster
+    if verbose:
+        print("✓ Multi-output baseline system trained")
+    # --- Predict ---
+    if verbose:
+        print("Generating predictions...")
+    predictions = predict_multivariate(
+        baseline_forecasters, steps_ahead=forecast_horizon
+    )
+    return predictions