PyPI - spotforecast2 - Versions diffs - 0.0.1__py3-none-any.whl - Mend

spotforecast2 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

spotforecast2/.DS_Store +0 -0
spotforecast2/__init__.py +2 -0
spotforecast2/data/__init__.py +0 -0
spotforecast2/data/data.py +130 -0
spotforecast2/data/fetch_data.py +209 -0
spotforecast2/exceptions.py +681 -0
spotforecast2/forecaster/.DS_Store +0 -0
spotforecast2/forecaster/__init__.py +7 -0
spotforecast2/forecaster/base.py +448 -0
spotforecast2/forecaster/metrics.py +527 -0
spotforecast2/forecaster/recursive/__init__.py +4 -0
spotforecast2/forecaster/recursive/_forecaster_equivalent_date.py +1075 -0
spotforecast2/forecaster/recursive/_forecaster_recursive.py +939 -0
spotforecast2/forecaster/recursive/_warnings.py +15 -0
spotforecast2/forecaster/utils.py +954 -0
spotforecast2/model_selection/__init__.py +5 -0
spotforecast2/model_selection/bayesian_search.py +453 -0
spotforecast2/model_selection/grid_search.py +314 -0
spotforecast2/model_selection/random_search.py +151 -0
spotforecast2/model_selection/split_base.py +357 -0
spotforecast2/model_selection/split_one_step.py +245 -0
spotforecast2/model_selection/split_ts_cv.py +634 -0
spotforecast2/model_selection/utils_common.py +718 -0
spotforecast2/model_selection/utils_metrics.py +103 -0
spotforecast2/model_selection/validation.py +685 -0
spotforecast2/preprocessing/__init__.py +30 -0
spotforecast2/preprocessing/_binner.py +378 -0
spotforecast2/preprocessing/_common.py +123 -0
spotforecast2/preprocessing/_differentiator.py +123 -0
spotforecast2/preprocessing/_rolling.py +136 -0
spotforecast2/preprocessing/curate_data.py +254 -0
spotforecast2/preprocessing/imputation.py +92 -0
spotforecast2/preprocessing/outlier.py +114 -0
spotforecast2/preprocessing/split.py +139 -0
spotforecast2/py.typed +0 -0
spotforecast2/utils/__init__.py +43 -0
spotforecast2/utils/convert_to_utc.py +44 -0
spotforecast2/utils/data_transform.py +208 -0
spotforecast2/utils/forecaster_config.py +344 -0
spotforecast2/utils/generate_holiday.py +70 -0
spotforecast2/utils/validation.py +569 -0
spotforecast2/weather/__init__.py +0 -0
spotforecast2/weather/weather_client.py +288 -0
spotforecast2-0.0.1.dist-info/METADATA +47 -0
spotforecast2-0.0.1.dist-info/RECORD +46 -0
spotforecast2-0.0.1.dist-info/WHEEL +4 -0

spotforecast2/.DS_Store ADDED Viewed

Binary file

spotforecast2/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ def hello() -> str:
2	+ return "Hello from spotforecast2!"

spotforecast2/data/__init__.py ADDED Viewed

File without changes

spotforecast2/data/data.py ADDED Viewed

@@ -0,0 +1,130 @@
+"""Data structures for input and processed data."""
+from dataclasses import dataclass
+from pathlib import Path
+from typing import List, Optional
+import pandas as pd
+from spotforecast2.utils.convert_to_utc import convert_to_utc
+@dataclass
+class Data:
+    """Container for input time series data.
+    Attributes:
+        data: pandas DataFrame containing the input time series data.
+    """
+    data: pd.DataFrame
+    @classmethod
+    def from_csv(
+        cls,
+        csv_path: Path,
+        timezone: Optional[str],
+        columns: Optional[List[str]] = None,
+        parse_dates=True,
+        index_col=0,
+        **kwargs,
+    ) -> "Data":
+        """Load data from a CSV file.
+        The CSV must contain a datetime column that becomes the DataFrame index.
+        The index is localized to the provided timezone if it is naive, and then
+        converted to UTC.
+        Args:
+            csv_path (Path): Path to the CSV file.
+            timezone (Optional[str]): Timezone to assign if the index has no
+                timezone. Must be provided if the index is naive.
+            columns (Optional[List[str]]): List of column names to include. If
+                provided, only these columns will be loaded from the CSV
+                (optimizes reading speed). If None, all columns are loaded.
+            parse_dates (bool or list, optional): Passed to ``pd.read_csv``.
+                Defaults to True.
+            index_col (int or str, optional): Column to use as index. Defaults to 0.
+            **kwargs (Any): Additional keyword arguments forwarded to ``pd.read_csv``.
+        Returns:
+            Data: Instance containing the loaded DataFrame.
+        Raises:
+            ValueError: If the CSV does not yield a DatetimeIndex.
+            ValueError: If the index is timezone-naive and no timezone is provided.
+        Examples:
+            >>> data = Data.from_csv(
+            ...     Path("data.csv"),
+            ...     timezone="UTC",
+            ...     columns=["target_col"]
+            ... )
+        """
+        # If columns specified, add index column to usecols for efficient reading
+        usecols = None
+        if columns is not None:
+            # Get the index column name/number
+            if isinstance(index_col, int):
+                # Read header first to get column names
+                header_df = pd.read_csv(csv_path, nrows=0)
+                index_col_name = header_df.columns[index_col]
+            else:
+                index_col_name = index_col
+            usecols = [index_col_name] + columns
+        df = pd.read_csv(
+            csv_path,
+            parse_dates=parse_dates,
+            index_col=index_col,
+            usecols=usecols,
+            **kwargs,
+        )
+        df = convert_to_utc(df, timezone)
+        if df.index.freq is None:
+            try:
+                df.index.freq = pd.infer_freq(df.index)
+            except (ValueError, TypeError):
+                pass
+        return cls(data=df)
+    @classmethod
+    def from_dataframe(
+        cls,
+        df: pd.DataFrame,
+        timezone: Optional[str],
+        columns: Optional[List[str]] = None,
+    ) -> "Data":
+        """Create a new Data instance from an existing DataFrame.
+        The DataFrame must have a datetime index. The index is localized to the
+        provided timezone if it is naive, and then converted to UTC.
+        Args:
+            df (pd.DataFrame): Input DataFrame containing data.
+            timezone (Optional[str]): Timezone to assign if the index is naive.
+                Must be provided if the index has no timezone.
+            columns (Optional[List[str]]): List of column names to include.
+                If provided, only these columns will be selected from the
+                DataFrame. If None, all columns are used.
+        Returns:
+            Data: Instance containing the provided DataFrame.
+        Raises:
+            ValueError: If the DataFrame index is not a DatetimeIndex.
+            ValueError: If the index is timezone-naive and no timezone is provided.
+        """
+        df = convert_to_utc(df, timezone)
+        # Select columns if specified
+        if columns is not None:
+            df = df[columns].copy()
+        if df.index.freq is None:
+            try:
+                df.index.freq = pd.infer_freq(df.index)
+            except (ValueError, TypeError):
+                pass
+        return cls(data=df)

spotforecast2/data/fetch_data.py ADDED Viewed

@@ -0,0 +1,209 @@
+import pandas as pd
+from spotforecast2.data.data import Data
+from pathlib import Path
+from os import environ
+from typing import Optional, Union
+from spotforecast2.utils.generate_holiday import create_holiday_df
+from pandas import Timestamp
+from spotforecast2.weather.weather_client import WeatherService
+def get_data_home(data_home: Optional[Union[str, Path]] = None) -> Path:
+    """Return the location where datasets are to be stored.
+    By default the data directory is set to a folder named 'spotforecast2_data' in the
+    user home folder. Alternatively, it can be set by the 'SPOTFORECAST2_DATA' environment
+    variable or programmatically by giving an explicit folder path. The '~'
+    symbol is expanded to the user home folder.
+    If the folder does not already exist, it is automatically created.
+    Args:
+        data_home (str or pathlib.Path, optional):
+            The path to spotforecast data directory. If `None`, the default path
+            is `~/spotforecast2_data`.
+    Returns:
+        data_home (pathlib.Path):
+            The path to the spotforecast data directory.
+    Examples:
+        >>> from pathlib import Path
+        >>> get_data_home()
+        PosixPath('/home/user/spotforecast2_data')
+        >>> get_data_home(Path('/tmp/spotforecast2_data'))
+        PosixPath('/tmp/spotforecast2_data')
+    """
+    if data_home is None:
+        data_home = environ.get(
+            "SPOTFORECAST2_DATA", Path.home() / "spotforecast2_data"
+        )
+    # Ensure data_home is a Path() object pointing to an absolute path
+    data_home = Path(data_home).expanduser().absolute()
+    # Create data directory if it does not exists.
+    data_home.mkdir(parents=True, exist_ok=True)
+    return data_home
+def fetch_data(
+    filename: str = "integrated_raw_data.csv",
+    columns: Optional[list] = None,
+    index_col: int = 0,
+    parse_dates: bool = True,
+    dayfirst: bool = False,
+    timezone: str = "UTC",
+) -> pd.DataFrame:
+    """Fetches the integrated raw dataset from a CSV file.
+    Args:
+        filename (str):
+            Filename of the CSV file containing the dataset. It must be located in the data home directory, which can be get or set using `get_data_home()`.
+        columns (list):
+            List of columns to be included in the dataset. Must be specified.
+        index_col (int):
+            Column index to be used as the index.
+        parse_dates (bool):
+            Whether to parse dates in the index column.
+        dayfirst (bool):
+            Whether the day comes first in date parsing.
+        timezone (str):
+            Timezone to set for the datetime index.
+    Returns:
+        pd.DataFrame: The integrated raw dataset.
+    Raises:
+        ValueError: If columns is None or empty.
+    Examples:
+        >>> from spotforecast2.data.fetch_data import fetch_data
+        >>> data = fetch_data(columns=["col1", "col2"])
+        >>> data.head()
+                        Header1  Header2  Header3
+    """
+    if columns is None or len(columns) == 0:
+        raise ValueError("columns must be specified and cannot be empty.")
+    csv_path = get_data_home() / filename
+    if not Path(csv_path).is_file():
+        raise FileNotFoundError(f"The file {csv_path} does not exist.")
+    dataset = Data.from_csv(
+        csv_path=csv_path,
+        index_col=index_col,
+        parse_dates=parse_dates,
+        dayfirst=dayfirst,
+        timezone=timezone,
+        columns=columns,
+    )
+    return dataset.data
+def fetch_holiday_data(
+    start: str | Timestamp,
+    end: str | Timestamp,
+    tz: str = "UTC",
+    freq: str = "h",
+    country_code: str = "DE",
+    state: str = "NW",
+) -> pd.DataFrame:
+    """Fetches holiday data for the dataset period.
+    Args:
+        start (str or pd.Timestamp):
+            Start date of the dataset period.
+        end (str or pd.Timestamp):
+            End date of the dataset period.
+        tz (str):
+            Timezone for the holiday data.
+        freq (str):
+            Frequency of the holiday data.
+        country_code (str):
+            Country code for the holidays.
+        state (str):
+            State code for the holidays.
+    Returns:
+        pd.DataFrame: DataFrame containing holiday information.
+    Examples:
+        >>> from spotforecast2.data.fetch_data import fetch_holiday_data
+        >>> holiday_df = fetch_holiday_data(
+        ...     start='2023-01-01T00:00',
+        ...     end='2023-01-10T00:00',
+        ...     tz='UTC',
+        ...     freq='h',
+        ...     country_code='DE',
+        ...     state='NW'
+        ... )
+        >>> holiday_df.head()
+                        is_holiday
+    """
+    holiday_df = create_holiday_df(
+        start=start, end=end, tz=tz, freq=freq, country_code=country_code, state=state
+    )
+    return holiday_df
+def fetch_weather_data(
+    cov_start: str,
+    cov_end: str,
+    latitude: float = 51.5136,
+    longitude: float = 7.4653,
+    timezone: str = "UTC",
+    freq: str = "h",
+    fallback_on_failure: bool = True,
+    cached=True,
+) -> pd.DataFrame:
+    """Fetches weather data for the dataset period plus forecast horizon.
+        Create weather dataframe using API with optional caching.
+    Args:
+        cov_start (str):
+            Start date for covariate data.
+        cov_end (str):
+            End date for covariate data.
+        latitude (float):
+            Latitude of the location for weather data. Default is 51.5136 (Dortmund).
+        longitude (float):
+            Longitude of the location for weather data. Default is 7.4653 (Dortmund).
+        timezone (str):
+            Timezone for the weather data.
+        freq (str):
+            Frequency of the weather data.
+        fallback_on_failure (bool):
+            Whether to use fallback data in case of failure.
+    Returns:
+        pd.DataFrame: DataFrame containing weather information.
+    Examples:
+        >>> from spotforecast2.data.fetch_data import fetch_weather_data
+        >>> weather_df = fetch_weather_data(
+        ...     cov_start='2023-01-01T00:00',
+        ...     cov_end='2023-01-11T00:00',
+        ...     latitude=51.5136,
+        ...     longitude=7.4653,
+        ...     timezone='UTC',
+        ...     freq='h',
+        ...     fallback_on_failure=True,
+        ...     cached=True
+        ... )
+        >>> weather_df.head()
+    """
+    if cached:
+        cache_path = get_data_home() / "weather_cache.parquet"
+    else:
+        cache_path = None
+    service = WeatherService(
+        latitude=latitude, longitude=longitude, cache_path=cache_path
+    )
+    weather_df = service.get_dataframe(
+        start=cov_start,
+        end=cov_end,
+        timezone=timezone,
+        freq=freq,
+        fallback_on_failure=fallback_on_failure,
+    )
+    return weather_df