PyPI - spotforecast2 - Versions diffs - 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl - Mend

spotforecast2 0.2.2py3-none-any.whl → 0.2.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

spotforecast2/data/__init__.py CHANGED Viewed

@@ -0,0 +1,15 @@
+from .fetch_data import (
+    get_data_home,
+    get_cache_home,
+    fetch_data,
+    fetch_holiday_data,
+    fetch_weather_data,
+)
+__all__ = [
+    "get_data_home",
+    "get_cache_home",
+    "fetch_data",
+    "fetch_holiday_data",
+    "fetch_weather_data",
+]

spotforecast2/data/fetch_data.py CHANGED Viewed

@@ -43,6 +43,61 @@ def get_data_home(data_home: Optional[Union[str, Path]] = None) -> Path:
     return data_home
+def get_cache_home(cache_home: Optional[Union[str, Path]] = None) -> Path:
+    """Return the location where persistent models are to be cached.
+    By default the cache directory is set to a folder named 'spotforecast2_cache' in the
+    user home folder. Alternatively, it can be set by the 'SPOTFORECAST2_CACHE' environment
+    variable or programmatically by giving an explicit folder path. The '~' symbol is
+    expanded to the user home folder. If the folder does not already exist, it is
+    automatically created.
+    This directory is used to store pickled trained models for quick reuse across
+    forecasting runs, following scikit-learn model persistence conventions.
+    Args:
+        cache_home (str or pathlib.Path, optional):
+            The path to spotforecast cache directory. If `None`, the default path
+            is `~/spotforecast2_cache`.
+    Returns:
+        pathlib.Path:
+            The path to the spotforecast cache directory.
+    Raises:
+        OSError: If the directory cannot be created due to permission issues.
+    Examples:
+        >>> from spotforecast2.data.fetch_data import get_cache_home
+        >>> cache_dir = get_cache_home()
+        >>> cache_dir.name
+        'spotforecast2_cache'
+        >>> # Custom cache location
+        >>> import tempfile
+        >>> from pathlib import Path
+        >>> custom_cache = get_cache_home(Path('/tmp/my_cache'))
+        >>> custom_cache.exists()
+        True
+        >>> # Using environment variable
+        >>> import os
+        >>> os.environ['SPOTFORECAST2_CACHE'] = '/var/cache/spotforecast2'
+        >>> cache_dir = get_cache_home()
+        >>> cache_dir.as_posix()
+        '/var/cache/spotforecast2'
+    """
+    if cache_home is None:
+        cache_home = environ.get(
+            "SPOTFORECAST2_CACHE", Path.home() / "spotforecast2_cache"
+        )
+    # Ensure cache_home is a Path() object pointing to an absolute path
+    cache_home = Path(cache_home).expanduser().absolute()
+    # Create cache directory if it does not exist
+    cache_home.mkdir(parents=True, exist_ok=True)
+    return cache_home
 def fetch_data(
     filename: Optional[str] = None,
     dataframe: Optional[pd.DataFrame] = None,
@@ -56,7 +111,7 @@ def fetch_data(
     Args:
         filename (str, optional):
-            Filename of the CSV file containing the dataset. Must be located in the
+            Filename of the CSV file containing the dataset. Must be located in the
             data home directory. If both filename and dataframe are None, defaults to "data_in.csv".
         dataframe (pd.DataFrame, optional):
             A pandas DataFrame to process. If provided, it will be processed with
@@ -87,13 +142,13 @@ def fetch_data(
         >>> data = fetch_data(columns=["col1", "col2"])
         >>> data.head()
                         Header1  Header2  Header3
         Load from specific CSV:
         >>> data = fetch_data(filename="custom_data.csv")
         Process a DataFrame:
         >>> import pandas as pd
-        >>> df = pd.DataFrame({"value": [1, 2, 3]},
+        >>> df = pd.DataFrame({"value": [1, 2, 3]},
         ...                   index=pd.date_range("2024-01-01", periods=3, freq="h"))
         >>> data = fetch_data(dataframe=df, timezone="Europe/Berlin")
         >>> data.index.tz
@@ -101,9 +156,11 @@ def fetch_data(
     """
     if columns is not None and len(columns) == 0:
         raise ValueError("columns must be specified and cannot be empty.")
     if filename is not None and dataframe is not None:
-        raise ValueError("Cannot specify both filename and dataframe. Please provide only one.")
+        raise ValueError(
+            "Cannot specify both filename and dataframe. Please provide only one."
+        )
     # Process DataFrame if provided
     if dataframe is not None:

spotforecast2/preprocessing/__init__.py CHANGED Viewed

@@ -6,7 +6,7 @@ from .curate_data import (
     agg_and_resample_data,
 )
 from .outlier import mark_outliers, manual_outlier_removal
-from .imputation import custom_weights, get_missing_weights
+from .imputation import custom_weights, get_missing_weights, WeightFunction
 from .split import split_abs_train_val_test, split_rel_train_val_test
 from ._differentiator import TimeSeriesDifferentiator
 from ._binner import QuantileBinner
@@ -22,6 +22,7 @@ __all__ = [
     "manual_outlier_removal",
     "custom_weights",
     "get_missing_weights",
+    "WeightFunction",
     "split_abs_train_val_test",
     "split_rel_train_val_test",
     "TimeSeriesDifferentiator",

spotforecast2/preprocessing/imputation.py CHANGED Viewed

@@ -1,4 +1,57 @@
 import pandas as pd
+from typing import Union
+import numpy as np
+class WeightFunction:
+    """Callable class for sample weights that can be pickled.
+    This class wraps the weights_series and provides a callable interface
+    compatible with ForecasterRecursive's weight_func parameter. Unlike
+    local functions with closures, instances of this class can be pickled
+    using standard pickle/joblib.
+    Args:
+        weights_series: Series containing weight values for each index.
+    Examples:
+        >>> import pandas as pd
+        >>> import pickle
+        >>> weights = pd.Series([1.0, 0.9, 0.8], index=[0, 1, 2])
+        >>> weight_func = WeightFunction(weights)
+        >>> weight_func(pd.Index([0, 1]))
+        array([1. , 0.9])
+        >>> # Can be pickled
+        >>> pickled = pickle.dumps(weight_func)
+        >>> unpickled = pickle.loads(pickled)
+        >>> unpickled(pd.Index([0, 1]))
+        array([1. , 0.9])
+    """
+    def __init__(self, weights_series: pd.Series):
+        """Initialize with a weights series.
+        Args:
+            weights_series: Series containing weight values for each index.
+        """
+        self.weights_series = weights_series
+    def __call__(
+        self, index: Union[pd.Index, np.ndarray, list]
+    ) -> Union[float, np.ndarray]:
+        """Return sample weights for given index.
+        Args:
+            index: Index or indices to get weights for.
+        Returns:
+            Weight value(s) corresponding to the index.
+        """
+        return custom_weights(index, self.weights_series)
+    def __repr__(self):
+        """String representation."""
+        return f"WeightFunction(weights_series with {len(self.weights_series)} entries)"
 def custom_weights(index, weights_series: pd.Series) -> float:

spotforecast2/processing/n2n_predict.py CHANGED Viewed

@@ -215,8 +215,8 @@ def n2n_predict(
     forecast_horizon: int = 24,
     contamination: float = 0.01,
     window_size: int = 72,
-    force_train: bool = False,
-    model_dir: Union[str, Path] = "./models_baseline",
+    force_train: bool = True,
+    model_dir: Optional[Union[str, Path]] = None,
     verbose: bool = True,
     show_progress: bool = True,
 ) -> Tuple[pd.DataFrame, Dict]:
@@ -231,7 +231,7 @@ def n2n_predict(
     6. Generates multi-step ahead predictions
     Models are persisted to disk following scikit-learn conventions using joblib.
-    Existing models are reused for prediction unless force_train=True.
+    By default, models are retrained (force_train=True). Set force_train=False to reuse existing cached models.
     Args:
         data: Optional DataFrame with target time series data. If None, fetches data automatically.
@@ -242,9 +242,8 @@ def n2n_predict(
         contamination: Contamination parameter for outlier detection. Default: 0.01.
         window_size: Rolling window size for gap detection. Default: 72.
         force_train: Force retraining of all models, ignoring cached models.
-            Default: False.
-        model_dir: Directory for saving/loading trained models.
-            Default: "./models_baseline".
+            Default: True.
+        model_dir: Directory for saving/loading trained models. If None, uses cache directory from get_cache_home(). Default: None (uses ~/spotforecast2_cache/forecasters).
         verbose: Print progress messages. Default: True.
         show_progress: Show progress bar during training and prediction. Default: True.
@@ -301,6 +300,8 @@ def n2n_predict(
           proceeds without retraining. This significantly speeds up prediction
           for repeated calls with the same configuration.
         - The model_dir directory is created automatically if it doesn't exist.
+        - Default model_dir uses get_cache_home() which respects the
+          SPOTFORECAST2_CACHE environment variable.
     Performance Notes:
         - First run: Full training (~2-5 minutes depending on data size)
@@ -315,6 +316,12 @@ def n2n_predict(
     if verbose:
         print("--- Starting n2n_predict ---")
+    # Set default model_dir if not provided
+    if model_dir is None:
+        from spotforecast2.data.fetch_data import get_cache_home
+        model_dir = get_cache_home() / "forecasters"
     # Handle data input - fetch_data handles both CSV and DataFrame
     if data is not None:
         if verbose:

spotforecast2/processing/n2n_predict_with_covariates.py CHANGED Viewed

@@ -85,7 +85,7 @@ from spotforecast2.preprocessing.curate_data import (
     curate_weather,
     get_start_end,
 )
-from spotforecast2.preprocessing.imputation import custom_weights, get_missing_weights
+from spotforecast2.preprocessing.imputation import get_missing_weights
 from spotforecast2.preprocessing.outlier import mark_outliers
 from spotforecast2.preprocessing.split import split_rel_train_val_test
@@ -742,8 +742,8 @@ def n2n_predict_with_covariates(
     include_weather_windows: bool = False,
     include_holiday_features: bool = False,
     include_poly_features: bool = False,
-    force_train: bool = False,
-    model_dir: Union[str, Path] = "./forecaster_models",
+    force_train: bool = True,
+    model_dir: Optional[Union[str, Path]] = None,
     verbose: bool = True,
     show_progress: bool = False,
 ) -> Tuple[pd.DataFrame, Dict, Dict]:
@@ -761,7 +761,7 @@ def n2n_predict_with_covariates(
     9. Generates multi-step ahead predictions
     Models are persisted to disk following scikit-learn conventions using joblib.
-    Existing models are reused for prediction unless force_train=True.
+    By default, models are retrained (force_train=True). Set force_train=False to reuse existing cached models.
     Args:
         data: Optional DataFrame with target time series data. If None, fetches data automatically.
@@ -782,9 +782,10 @@ def n2n_predict_with_covariates(
         include_holiday_features: Include holiday features. Default: False.
         include_poly_features: Include polynomial interaction features. Default: False.
         force_train: Force retraining of all models, ignoring cached models.
-            Default: False.
-        model_dir: Directory for saving/loading trained models.
-            Default: "./models_covariates".
+            Default: True.
+        model_dir: Directory for saving/loading trained models. If None, uses the
+            spotforecast2 cache directory (~/spotforecast2_cache by default, or
+            SPOTFORECAST2_CACHE environment variable). Default: None.
         verbose: Print progress messages. Default: True.
         show_progress: Show progress bar during training. Default: False.
@@ -850,12 +851,20 @@ def n2n_predict_with_covariates(
           proceeds without retraining. This significantly speeds up prediction
           for repeated calls with the same configuration.
         - The model_dir directory is created automatically if it doesn't exist.
+        - By default, models are cached in ~/spotforecast2_cache, which can be
+          customized via the SPOTFORECAST2_CACHE environment variable.
     Performance Notes:
         - First run: Full training (~5-10 minutes depending on data size)
         - Subsequent runs (force_train=False): Model loading only (~1-2 seconds)
         - Force retrain (force_train=True): Full training again (~5-10 minutes)
     """
+    # Set default model_dir if not provided
+    if model_dir is None:
+        from spotforecast2.data.fetch_data import get_cache_home
+        model_dir = get_cache_home() / "forecasters"
     if verbose:
         print("=" * 80)
         print("N2N Recursive Forecasting with Exogenous Covariates")
@@ -877,7 +886,7 @@ def n2n_predict_with_covariates(
         if verbose:
             print("  Using provided dataframe...")
         data = fetch_data(dataframe=data, timezone=timezone)
     target_columns = data.columns.tolist()
     if verbose:
@@ -921,13 +930,13 @@ def n2n_predict_with_covariates(
     # Invert missing_mask: True (missing) -> 0 (weight), False (valid) -> 1 (weight)
     weights_series = (~missing_mask).astype(float)
-    def weight_func(index):
-        """Return sample weights for given index."""
-        return custom_weights(index, weights_series)
+    # Use WeightFunction class which is picklable (unlike local functions with closures)
+    from spotforecast2.preprocessing import WeightFunction
+    weight_func = WeightFunction(weights_series)
-    # Note: weight_func is a local function and cannot be pickled.
-    # Model persistence is disabled when using weight_func.
-    use_model_persistence = False
+    # Model persistence enabled: WeightFunction instances can be pickled
+    use_model_persistence = True
     # ========================================================================
     # 4. EXOGENOUS FEATURES ENGINEERING

spotforecast2/utils/forecaster_config.py CHANGED Viewed

@@ -222,14 +222,18 @@ def initialize_weights(
             for key in weight_func:
                 try:
                     source_code_weight_func[key] = inspect.getsource(weight_func[key])
-                except OSError:
+                except (OSError, TypeError):
+                    # OSError: source not available, TypeError: callable class instance
                     source_code_weight_func[key] = (
                         f"<source unavailable: {weight_func[key]!r}>"
                     )
         else:
             try:
                 source_code_weight_func = inspect.getsource(weight_func)
-            except OSError:
+            except (OSError, TypeError):
+                # OSError: source not available (e.g., built-in, lambda in REPL)
+                # TypeError: callable class instance (e.g., WeightFunction)
+                # In these cases, we can't get source but the object can still be pickled
                 source_code_weight_func = f"<source unavailable: {weight_func!r}>"
         if "sample_weight" not in inspect.signature(estimator.fit).parameters:

{spotforecast2-0.2.2.dist-info → spotforecast2-0.2.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: spotforecast2
-Version: 0.2.2
+Version: 0.2.3
 Summary: Forecasting with spot
 Author: bartzbeielstein
 Author-email: bartzbeielstein <32470350+bartzbeielstein@users.noreply.github.com>

{spotforecast2-0.2.2.dist-info → spotforecast2-0.2.3.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
 spotforecast2/__init__.py,sha256=X9sBx15iz8yqr9iDJcrGJM5nhvnpaczXto4XV_GtfhE,59
-spotforecast2/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+spotforecast2/data/__init__.py,sha256=_AEH7sDHbeiDma7tn8XJQAiYxujzH6EkF4X9b8U0Xig,259
 spotforecast2/data/data.py,sha256=HEgr-FULaqHvuMeKTviOgYyo3GbxpGRTo3ZnmIU9w2Y,4422
-spotforecast2/data/fetch_data.py,sha256=N99W-NNTC2hbXmx1FofITsvXJfHj9py4r5Kllf5950Y,8464
+spotforecast2/data/fetch_data.py,sha256=37fKCWjRfc2bkfvIVBRU53ZIwsldrc0JUIOlj66duG4,10562
 spotforecast2/exceptions.py,sha256=6gOji-3cP-YAisPoxXCcrEEbjTnfPN1YqEhGYhmyZ8Y,20499
 spotforecast2/forecaster/__init__.py,sha256=BbCOS2ouKcPC9VzcdprllVyqlZIyAWXCOvUAiInxDi4,140
 spotforecast2/forecaster/base.py,sha256=rXhcjY4AMpyQhkpbtLIA8OOrGEb8fU57SQiyeR9c9DQ,16748
@@ -21,27 +21,27 @@ spotforecast2/model_selection/split_ts_cv.py,sha256=uwACVC5m-cRuCtpA5U46K-tdj0zm
 spotforecast2/model_selection/utils_common.py,sha256=HKDxm4pLwG0cqhE4t8bzNHFtRa6yn_O7b5ud-nx6b7E,31814
 spotforecast2/model_selection/utils_metrics.py,sha256=mMVKh03-yAvRjEnZlbg3CsktXNcHo7yiTkI5VMg5wQk,3842
 spotforecast2/model_selection/validation.py,sha256=nwZATc74tVb992HbefP_sAcJaz8ukV_uqjtVFXaySxs,30038
-spotforecast2/preprocessing/__init__.py,sha256=Jk1RJRbPkggw70h4Lay4FY7yQHN9_tjRxzp9QJcF3Oo,828
+spotforecast2/preprocessing/__init__.py,sha256=87koxOzPfn3ueVaIgx6u36gNBh27YRGPIVYwLcF6HGg,866
 spotforecast2/preprocessing/_binner.py,sha256=EYBOwNSOW85bdLUgQ_qLSq8xpujWJezWkNTIL1jNaYo,13723
 spotforecast2/preprocessing/_common.py,sha256=aP8EIYIg3iBXnijXByHedGEdcubXu-ciRtEgqdDfO_8,3141
 spotforecast2/preprocessing/_differentiator.py,sha256=otka_TO1edM3zgp16zOjeSKxa61arbmPPsr96_GfgLI,4646
 spotforecast2/preprocessing/_rolling.py,sha256=_BUG_aHbOI-1e2ku8AwsJJGl3akTBWjRju2PhclkXso,4202
 spotforecast2/preprocessing/curate_data.py,sha256=4VV8aYwShyrUc9lqWVx_ckIH-moK0B8ONEMb2i463ag,9603
-spotforecast2/preprocessing/imputation.py,sha256=lmH-HumI_QLLm9aMESe_oZq84Axn60woLaMqd_Abw3k,3509
+spotforecast2/preprocessing/imputation.py,sha256=wXHXcIwWb7_XqW9JdBjaRA7NxWhbKWoQyW5z0KkPLd8,5201
 spotforecast2/preprocessing/outlier.py,sha256=jZxAR870QtYner7b4gXk6LLGJw0juLq1VU4CGklYd3c,4208
 spotforecast2/preprocessing/split.py,sha256=mzzt5ltUZdVzfWtBBTQjp8E2MyqVdWUFtz7nN11urbU,5011
 spotforecast2/processing/agg_predict.py,sha256=VKlruB0x-eJKokkHyJxR87rZ4m53si3ODbrd0ibPlow,2378
-spotforecast2/processing/n2n_predict.py,sha256=Sr2AFaCZxP-tbsxlEjiSdjBU-mtBiDa_f6rJLEJov64,14912
-spotforecast2/processing/n2n_predict_with_covariates.py,sha256=PyB3X1rNb18JBC72YiN12hUg5eSjUAsW4M-atczmCSQ,40914
+spotforecast2/processing/n2n_predict.py,sha256=NZku7xnt9ZLu4V9FMlfbDmU2rzvQPXFYyhvdu2WRtlk,15324
+spotforecast2/processing/n2n_predict_with_covariates.py,sha256=20bHmODXzb2CRSXjxtsqTtKuJ-1_zjo1RKQKjmygYyw,41399
 spotforecast2/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 spotforecast2/utils/__init__.py,sha256=NrMt_xJLe4rbTFbsbgSQYeREohEOiYG5S-97e6Jj07I,1018
 spotforecast2/utils/convert_to_utc.py,sha256=hz8mJUHK9jDLUiN5LdNX5l3KZuOKlklyycB4zFdB9Ng,1405
 spotforecast2/utils/data_transform.py,sha256=PhLeZoimM0TLfp34Fp56dQrxlCYNWGVU8h8RZHdZSlo,7294
-spotforecast2/utils/forecaster_config.py,sha256=0jchk_9tjxzttN8btWlRBfAjT2bz27JO4CDrpPsC58E,12875
+spotforecast2/utils/forecaster_config.py,sha256=qnpgH97u8ffD3rIgSXyNDl48lgm5FeWplKwrK5tKOJ4,13236
 spotforecast2/utils/generate_holiday.py,sha256=SHaPvPMt-abis95cChHf5ObyPwCTrzJ87bxffeqZLRc,2707
 spotforecast2/utils/validation.py,sha256=x9ypQzcneDhWJA_piiY4Q3_ogoGd1LTsZ7__MFeG9Fc,21618
 spotforecast2/weather/__init__.py,sha256=1Jco88pl0deNESgNATin83Nf5i9c58pxN7G-vNiOiu0,120
 spotforecast2/weather/weather_client.py,sha256=Ec_ywug6uoa71MfXM8RNbXEvtBtBzr-SUS5xq_HKtZE,9837
-spotforecast2-0.2.2.dist-info/WHEEL,sha256=5DEXXimM34_d4Gx1AuF9ysMr1_maoEtGKjaILM3s4w4,80
-spotforecast2-0.2.2.dist-info/METADATA,sha256=f5BfMpKyfzwbTOTguKeNPgjbuEu2N0zwMrfcjG82XYo,3481
-spotforecast2-0.2.2.dist-info/RECORD,,
+spotforecast2-0.2.3.dist-info/WHEEL,sha256=5DEXXimM34_d4Gx1AuF9ysMr1_maoEtGKjaILM3s4w4,80
+spotforecast2-0.2.3.dist-info/METADATA,sha256=nsr5BzvCVIwKXeRDsPVgpSuEwcQ_-KTm3T72Yz_7tYY,3481
+spotforecast2-0.2.3.dist-info/RECORD,,

{spotforecast2-0.2.2.dist-info → spotforecast2-0.2.3.dist-info}/WHEEL RENAMED Viewed

File without changes

spotforecast2 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl

spotforecast2 0.2.2py3-none-any.whl → 0.2.3py3-none-any.whl