PyPI - tabpfn-time-series - Versions diffs - 0.1.2__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

tabpfn-time-series 0.1.2py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

tabpfn_time_series/__init__.py +3 -2
tabpfn_time_series/features/__init__.py +17 -0
tabpfn_time_series/features/auto_features.py +307 -0
tabpfn_time_series/features/basic_features.py +88 -0
tabpfn_time_series/features/feature_generator_base.py +21 -0
tabpfn_time_series/features/feature_transformer.py +53 -0
tabpfn_time_series/predictor.py +5 -7
tabpfn_time_series/tabpfn_worker.py +119 -17
{tabpfn_time_series-0.1.2.dist-info → tabpfn_time_series-1.0.0.dist-info}/METADATA +30 -14
tabpfn_time_series-1.0.0.dist-info/RECORD +15 -0
tabpfn_time_series/feature.py +0 -78
tabpfn_time_series-0.1.2.dist-info/RECORD +0 -11
{tabpfn_time_series-0.1.2.dist-info → tabpfn_time_series-1.0.0.dist-info}/WHEEL +0 -0
{tabpfn_time_series-0.1.2.dist-info → tabpfn_time_series-1.0.0.dist-info}/licenses/LICENSE.txt +0 -0

tabpfn_time_series/__init__.py CHANGED Viewed

@@ -1,11 +1,12 @@
-from .feature import DefaultFeatures, FeatureTransformer
+from .features import FeatureTransformer
 from .predictor import TabPFNTimeSeriesPredictor, TabPFNMode
+from .defaults import TABPFN_TS_DEFAULT_QUANTILE_CONFIG
 __version__ = "0.1.0"
 __all__ = [
-    "DefaultFeatures",
     "FeatureTransformer",
     "TabPFNTimeSeriesPredictor",
     "TabPFNMode",
+    "TABPFN_TS_DEFAULT_QUANTILE_CONFIG",
 ]

tabpfn_time_series/features/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+from .basic_features import (
+    RunningIndexFeature,
+    CalendarFeature,
+    AdditionalCalendarFeature,
+    PeriodicSinCosineFeature,
+)
+from .auto_features import AutoSeasonalFeature
+from .feature_transformer import FeatureTransformer
+__all__ = [
+    "RunningIndexFeature",
+    "CalendarFeature",
+    "AdditionalCalendarFeature",
+    "AutoSeasonalFeature",
+    "PeriodicSinCosineFeature",
+    "FeatureTransformer",
+]

tabpfn_time_series/features/auto_features.py ADDED Viewed

@@ -0,0 +1,307 @@
+import numpy as np
+import pandas as pd
+from typing import List, Optional, Tuple, Literal
+import logging
+from scipy import fft
+from scipy.signal import find_peaks
+from statsmodels.tsa.stattools import acf
+from tabpfn_time_series.features.feature_generator_base import (
+    FeatureGenerator,
+)
+from tabpfn_time_series.features.basic_features import (
+    PeriodicSinCosineFeature,
+)
+logger = logging.getLogger(__name__)
+class AutoSeasonalFeature(FeatureGenerator):
+    class Config:
+        max_top_k: int = 5
+        do_detrend: bool = True
+        detrend_type: Literal["first_diff", "loess", "linear", "constant"] = "linear"
+        use_peaks_only: bool = True
+        apply_hann_window: bool = True
+        zero_padding_factor: int = 2
+        round_to_closest_integer: bool = True
+        validate_with_acf: bool = False
+        sampling_interval: float = 1.0
+        magnitude_threshold: Optional[float] = 0.05
+        relative_threshold: bool = True
+        exclude_zero: bool = True
+    def __init__(self, config: Optional[dict] = None):
+        # Create default config from Config class
+        default_config = {
+            k: v for k, v in vars(self.Config).items() if not k.startswith("__")
+        }
+        # Initialize config with defaults
+        self.config = default_config.copy()
+        # Update with user-provided config if any
+        if config is not None:
+            self.config.update(config)
+        # Validate config parameters
+        self._validate_config()
+        logger.debug(f"Initialized AutoSeasonalFeature with config: {self.config}")
+    def _validate_config(self):
+        """Validate configuration parameters"""
+        if self.config["max_top_k"] < 1:
+            logger.warning("max_top_k must be at least 1, setting to 1")
+            self.config["max_top_k"] = 1
+        if self.config["zero_padding_factor"] < 1:
+            logger.warning("zero_padding_factor must be at least 1, setting to 1")
+            self.config["zero_padding_factor"] = 1
+        if self.config["detrend_type"] not in [
+            "first_diff",
+            "loess",
+            "linear",
+            "constant",
+        ]:
+            logger.warning(
+                f"Invalid detrend_type: {self.config['detrend_type']}, using 'linear'"
+            )
+            self.config["detrend_type"] = "linear"
+    def generate(self, df: pd.DataFrame) -> pd.DataFrame:
+        df = df.copy()
+        # Detect seasonal periods from target data
+        detected_periods_and_magnitudes = self.find_seasonal_periods(
+            df.target, **self.config
+        )
+        logger.debug(
+            f"Found {len(detected_periods_and_magnitudes)} seasonal periods: {detected_periods_and_magnitudes}"
+        )
+        # Extract just the periods (without magnitudes)
+        periods = [period for period, _ in detected_periods_and_magnitudes]
+        # Generate features for detected periods using PeriodicSinCosineFeature
+        if periods:
+            feature_generator = PeriodicSinCosineFeature(periods=periods)
+            df = feature_generator.generate(df)
+        # Standardize column names for consistency across time series
+        renamed_columns = {}
+        for i, period in enumerate(periods):
+            renamed_columns[f"sin_{period}"] = f"sin_#{i}"
+            renamed_columns[f"cos_{period}"] = f"cos_#{i}"
+        df = df.rename(columns=renamed_columns)
+        # Add placeholder zero columns for missing periods up to max_top_k
+        for i in range(len(periods), self.config["max_top_k"]):
+            df[f"sin_#{i}"] = 0.0
+            df[f"cos_#{i}"] = 0.0
+        return df
+    @staticmethod
+    def find_seasonal_periods(
+        target_values: pd.Series,
+        max_top_k: int = 10,
+        do_detrend: bool = True,
+        detrend_type: Literal[
+            "first_diff", "loess", "linear", "constant"
+        ] = "first_diff",
+        use_peaks_only: bool = True,
+        apply_hann_window: bool = True,
+        zero_padding_factor: int = 2,
+        round_to_closest_integer: bool = True,
+        validate_with_acf: bool = False,
+        sampling_interval: float = 1.0,
+        magnitude_threshold: Optional[
+            float
+        ] = 0.05,  # Default relative threshold (5% of max)
+        relative_threshold: bool = True,  # Interpret threshold as a fraction of max FFT magnitude
+        exclude_zero: bool = False,
+    ) -> List[Tuple[float, float]]:
+        """
+        Identify dominant seasonal periods in a time series using FFT.
+        Parameters:
+        - target_values: pd.Series
+            Input time series data.
+        - max_top_k: int
+            Maximum number of dominant periods to return.
+        - do_detrend: bool
+            If True, remove the linear trend from the signal.
+        - use_peaks_only: bool
+            If True, consider only local peaks in the FFT magnitude spectrum.
+        - apply_hann_window: bool
+            If True, apply a Hann window to reduce spectral leakage.
+        - zero_padding_factor: int
+            Factor by which to zero-pad the signal for finer frequency resolution.
+        - round_to_closest_integer: bool
+            If True, round the detected periods to the nearest integer.
+        - validate_with_acf: bool
+            If True, validate detected periods against the autocorrelation function.
+        - sampling_interval: float
+            Time interval between consecutive samples.
+        - magnitude_threshold: Optional[float]
+            Threshold to filter out less significant frequency components.
+            Default is 0.05, interpreted as 5% of the maximum FFT magnitude if relative_threshold is True.
+        - relative_threshold: bool
+            If True, the `magnitude_threshold` is interpreted as a fraction of the maximum FFT magnitude.
+            Otherwise, it is treated as an absolute threshold value.
+        - exclude_zero: bool
+            If True, exclude periods of 0 from the results.
+        Returns:
+        - List[Tuple[float, float]]:
+            A list of (period, magnitude) tuples, sorted in descending order by magnitude.
+        """
+        # Convert the Pandas Series to a NumPy array
+        values = np.array(target_values, dtype=float)
+        # Quick hack to ignore the test_X
+        #   (Assuming train_X target is not NaN, and test_X target is NaN)
+        #   Dropping all the NaN values
+        values = values[~np.isnan(values)]
+        N_original = len(values)
+        # Detrend the signal using a linear detrend method if requested
+        if do_detrend:
+            values = detrend(values, detrend_type)
+        # Apply a Hann window to reduce spectral leakage
+        if apply_hann_window:
+            window = np.hanning(N_original)
+            values = values * window
+        # Zero-pad the signal for improved frequency resolution
+        if zero_padding_factor > 1:
+            padded_length = int(N_original * zero_padding_factor)
+            padded_values = np.zeros(padded_length)
+            padded_values[:N_original] = values
+            values = padded_values
+            N = padded_length
+        else:
+            N = N_original
+        # Compute the FFT (using rfft) and obtain frequency bins
+        fft_values = fft.rfft(values)
+        fft_magnitudes = np.abs(fft_values)
+        freqs = np.fft.rfftfreq(N, d=sampling_interval)
+        # Exclude the DC component (0 Hz) to avoid bias from the signal's mean
+        fft_magnitudes[0] = 0.0
+        # Determine the threshold (absolute value)
+        if magnitude_threshold is not None and relative_threshold:
+            threshold_value = magnitude_threshold * np.max(fft_magnitudes)
+        else:
+            threshold_value = magnitude_threshold
+        # Identify dominant frequencies
+        if use_peaks_only:
+            if threshold_value is not None:
+                peak_indices, _ = find_peaks(fft_magnitudes, height=threshold_value)
+            else:
+                peak_indices, _ = find_peaks(fft_magnitudes)
+            if len(peak_indices) == 0:
+                # Fallback to considering all frequency bins if no peaks are found
+                peak_indices = np.arange(len(fft_magnitudes))
+            # Sort the peak indices by magnitude in descending order
+            sorted_peak_indices = peak_indices[
+                np.argsort(fft_magnitudes[peak_indices])[::-1]
+            ]
+            top_indices = sorted_peak_indices[:max_top_k]
+        else:
+            sorted_indices = np.argsort(fft_magnitudes)[::-1]
+            if threshold_value is not None:
+                sorted_indices = [
+                    i for i in sorted_indices if fft_magnitudes[i] >= threshold_value
+                ]
+            top_indices = sorted_indices[:max_top_k]
+        # Convert frequencies to periods (avoiding division by zero)
+        periods = np.zeros_like(freqs)
+        non_zero = freqs > 0
+        periods[non_zero] = 1.0 / freqs[non_zero]
+        top_periods = periods[top_indices]
+        logger.debug(f"Top periods: {top_periods}")
+        # Optionally round the periods to the nearest integer
+        if round_to_closest_integer:
+            top_periods = np.round(top_periods)
+        # Filter out zero periods if requested
+        if exclude_zero:
+            non_zero_mask = top_periods != 0
+            top_periods = top_periods[non_zero_mask]
+            top_indices = top_indices[non_zero_mask]
+        # Keep unique periods only
+        if len(top_periods) > 0:
+            unique_period_indices = np.unique(top_periods, return_index=True)[1]
+            top_periods = top_periods[unique_period_indices]
+            top_indices = top_indices[unique_period_indices]
+        # Pair each period with its corresponding magnitude
+        results = [
+            (top_periods[i], fft_magnitudes[top_indices[i]])
+            for i in range(len(top_indices))
+        ]
+        # Validate with ACF if requested and filter the results accordingly
+        if validate_with_acf:
+            # Compute ACF on the original (non-padded) detrended signal
+            acf_values = acf(
+                np.array(target_values, dtype=float)[:N_original],
+                nlags=N_original,
+                fft=True,
+            )
+            acf_peak_indices, _ = find_peaks(
+                acf_values, height=1.96 / np.sqrt(N_original)
+            )
+            validated_results = []
+            for period, mag in results:
+                period_int = int(round(period))
+                if period_int < len(acf_values) and any(
+                    abs(period_int - peak) <= 1 for peak in acf_peak_indices
+                ):
+                    validated_results.append((period, mag))
+            if validated_results:
+                results = validated_results
+        # Ensure the final results are sorted in descending order by magnitude
+        results.sort(key=lambda x: x[1], reverse=True)
+        return results
+def detrend(
+    x: np.ndarray, detrend_type: Literal["first_diff", "loess", "linear"]
+) -> np.ndarray:
+    if detrend_type == "first_diff":
+        return np.diff(x, prepend=x[0])
+    elif detrend_type == "loess":
+        from statsmodels.api import nonparametric
+        indices = np.arange(len(x))
+        lowess = nonparametric.lowess(x, indices, frac=0.1)
+        trend = lowess[:, 1]
+        return x - trend
+    elif detrend_type in ["linear", "constant"]:
+        from scipy.signal import detrend as scipy_detrend
+        return scipy_detrend(x, type=detrend_type)
+    else:
+        raise ValueError(f"Invalid detrend method: {detrend_type}")

tabpfn_time_series/features/basic_features.py ADDED Viewed

@@ -0,0 +1,88 @@
+import numpy as np
+import pandas as pd
+from typing import List, Dict, Optional
+import gluonts.time_feature
+from tabpfn_time_series.features.feature_generator_base import (
+    FeatureGenerator,
+)
+class RunningIndexFeature(FeatureGenerator):
+    def generate(self, df: pd.DataFrame) -> pd.DataFrame:
+        df = df.copy()
+        df["running_index"] = range(len(df))
+        return df
+class CalendarFeature(FeatureGenerator):
+    def __init__(
+        self,
+        components: Optional[List[str]] = None,
+        seasonal_features: Optional[Dict[str, List[float]]] = None,
+    ):
+        self.components = components or ["year"]
+        self.seasonal_features = seasonal_features or {
+            # (feature, natural seasonality)
+            "second_of_minute": [60],
+            "minute_of_hour": [60],
+            "hour_of_day": [24],
+            "day_of_week": [7],
+            "day_of_month": [30.5],
+            "day_of_year": [365],
+            "week_of_year": [52],
+            "month_of_year": [12],
+        }
+    def generate(self, df: pd.DataFrame) -> pd.DataFrame:
+        df = df.copy()
+        timestamps = df.index.get_level_values("timestamp")
+        # Add basic calendar components
+        for component in self.components:
+            df[component] = getattr(timestamps, component)
+        # Add seasonal features
+        for feature_name, periods in self.seasonal_features.items():
+            feature_func = getattr(gluonts.time_feature, f"{feature_name}_index")
+            feature = feature_func(timestamps).astype(np.int32)
+            if periods is not None:
+                for period in periods:
+                    period = period - 1  # Adjust for 0-based indexing
+                    df[f"{feature_name}_sin"] = np.sin(2 * np.pi * feature / period)
+                    df[f"{feature_name}_cos"] = np.cos(2 * np.pi * feature / period)
+            else:
+                df[feature_name] = feature
+        return df
+class AdditionalCalendarFeature(CalendarFeature):
+    def __init__(
+        self,
+        components: Optional[List[str]] = None,
+        additional_seasonal_features: Optional[Dict[str, List[float]]] = None,
+    ):
+        super().__init__(components=components)
+        self.seasonal_features = {
+            **additional_seasonal_features,
+            **self.seasonal_features,
+        }
+class PeriodicSinCosineFeature(FeatureGenerator):
+    def __init__(self, periods: List[float], name_suffix: str = None):
+        self.periods = periods
+        self.name_suffix = name_suffix
+    def generate(self, df: pd.DataFrame) -> pd.DataFrame:
+        df = df.copy()
+        for i, period in enumerate(self.periods):
+            name_suffix = f"{self.name_suffix}_{i}" if self.name_suffix else f"{period}"
+            df[f"sin_{name_suffix}"] = np.sin(2 * np.pi * np.arange(len(df)) / period)
+            df[f"cos_{name_suffix}"] = np.cos(2 * np.pi * np.arange(len(df)) / period)
+        return df

tabpfn_time_series/features/feature_generator_base.py ADDED Viewed

@@ -0,0 +1,21 @@
+from abc import ABC, abstractmethod
+import pandas as pd
+class FeatureGenerator(ABC):
+    """Abstract base class for feature generators"""
+    @abstractmethod
+    def generate(self, df: pd.DataFrame) -> pd.DataFrame:
+        """Generate features for the given dataframe"""
+        pass
+    def __call__(self, df: pd.DataFrame) -> pd.DataFrame:
+        return self.generate(df)
+    def __str__(self) -> str:
+        return f"{self.__class__.__name__}_{self.__dict__}"
+    def __repr__(self) -> str:
+        return self.__str__()

tabpfn_time_series/features/feature_transformer.py ADDED Viewed

@@ -0,0 +1,53 @@
+from typing import List, Tuple
+import pandas as pd
+from autogluon.timeseries import TimeSeriesDataFrame
+from tabpfn_time_series.features.feature_generator_base import (
+    FeatureGenerator,
+)
+class FeatureTransformer:
+    def __init__(self, feature_generators: List[FeatureGenerator]):
+        self.feature_generators = feature_generators
+    def transform(
+        self,
+        train_tsdf: TimeSeriesDataFrame,
+        test_tsdf: TimeSeriesDataFrame,
+        target_column: str = "target",
+    ) -> Tuple[TimeSeriesDataFrame, TimeSeriesDataFrame]:
+        """Transform both train and test data with the configured feature generators"""
+        self._validate_input(train_tsdf, test_tsdf, target_column)
+        tsdf = pd.concat([train_tsdf, test_tsdf])
+        # Apply all feature generators
+        for generator in self.feature_generators:
+            tsdf = tsdf.groupby(level="item_id", group_keys=False).apply(generator)
+        # Split train and test tsdf
+        train_tsdf = tsdf.iloc[: len(train_tsdf)]
+        test_tsdf = tsdf.iloc[len(train_tsdf) :]
+        assert (
+            not train_tsdf[target_column].isna().any()
+        ), "All target values in train_tsdf should be non-NaN"
+        assert test_tsdf[target_column].isna().all()
+        return train_tsdf, test_tsdf
+    @staticmethod
+    def _validate_input(
+        train_tsdf: TimeSeriesDataFrame,
+        test_tsdf: TimeSeriesDataFrame,
+        target_column: str,
+    ):
+        if target_column not in train_tsdf.columns:
+            raise ValueError(
+                f"Target column '{target_column}' not found in training data"
+            )
+        if not test_tsdf[target_column].isna().all():
+            raise ValueError("Test data should not contain target values")

tabpfn_time_series/predictor.py CHANGED Viewed

@@ -3,11 +3,8 @@ from enum import Enum
 from autogluon.timeseries import TimeSeriesDataFrame
-from tabpfn_time_series.tabpfn_worker import TabPFNClient, LocalTabPFN
-from tabpfn_time_series.defaults import (
-    TABPFN_TS_DEFAULT_QUANTILE_CONFIG,
-    TABPFN_TS_DEFAULT_CONFIG,
-)
+from tabpfn_time_series.tabpfn_worker import TabPFNClient, LocalTabPFN, MockTabPFN
+from tabpfn_time_series.defaults import TABPFN_TS_DEFAULT_CONFIG
 logger = logging.getLogger(__name__)
@@ -15,6 +12,7 @@ logger = logging.getLogger(__name__)
 class TabPFNMode(Enum):
     LOCAL = "tabpfn-local"
     CLIENT = "tabpfn-client"
+    MOCK = "tabpfn-mock"
 class TabPFNTimeSeriesPredictor:
@@ -30,6 +28,7 @@ class TabPFNTimeSeriesPredictor:
         worker_mapping = {
             TabPFNMode.CLIENT: lambda: TabPFNClient(config),
             TabPFNMode.LOCAL: lambda: LocalTabPFN(config),
+            TabPFNMode.MOCK: lambda: MockTabPFN(config),
         }
         self.tabpfn_worker = worker_mapping[tabpfn_mode]()
@@ -37,7 +36,6 @@ class TabPFNTimeSeriesPredictor:
         self,
         train_tsdf: TimeSeriesDataFrame,  # with features and target
         test_tsdf: TimeSeriesDataFrame,  # with features only
-        quantile_config: list[float] = TABPFN_TS_DEFAULT_QUANTILE_CONFIG,
     ) -> TimeSeriesDataFrame:
         """
         Predict on each time series individually (local forecasting).
@@ -47,4 +45,4 @@ class TabPFNTimeSeriesPredictor:
             f"Predicting {len(train_tsdf.item_ids)} time series with config{self.tabpfn_worker.config}"
         )
-        return self.tabpfn_worker.predict(train_tsdf, test_tsdf, quantile_config)
+        return self.tabpfn_worker.predict(train_tsdf, test_tsdf)

tabpfn_time_series/tabpfn_worker.py CHANGED Viewed

@@ -2,8 +2,10 @@ import logging
 from abc import ABC, abstractmethod
 from joblib import Parallel, delayed
+from tqdm import tqdm
 import pandas as pd
 import numpy as np
+import torch
 from scipy.stats import norm
 from autogluon.timeseries import TimeSeriesDataFrame
@@ -26,14 +28,7 @@ class TabPFNWorker(ABC):
         self,
         train_tsdf: TimeSeriesDataFrame,
         test_tsdf: TimeSeriesDataFrame,
-        quantile_config: list[float],
     ):
-        if not set(quantile_config).issubset(set(TABPFN_TS_DEFAULT_QUANTILE_CONFIG)):
-            raise NotImplementedError(
-                f"We currently only supports {TABPFN_TS_DEFAULT_QUANTILE_CONFIG} for quantile prediction,"
-                f" but got {quantile_config}."
-            )
         predictions = Parallel(
             n_jobs=self.num_workers,
             backend="loky",
@@ -42,9 +37,8 @@ class TabPFNWorker(ABC):
                 item_id,
                 train_tsdf.loc[item_id],
                 test_tsdf.loc[item_id],
-                quantile_config,
             )
-            for item_id in train_tsdf.item_ids
+            for item_id in tqdm(train_tsdf.item_ids, desc="Predicting time series")
         )
         predictions = pd.concat(predictions)
@@ -59,8 +53,9 @@ class TabPFNWorker(ABC):
         item_id: str,
         single_train_tsdf: TimeSeriesDataFrame,
         single_test_tsdf: TimeSeriesDataFrame,
-        quantile_config: list[float],
     ) -> pd.DataFrame:
+        # logger.debug(f"Predicting on item_id: {item_id}")
         test_index = single_test_tsdf.index
         train_X, train_y = split_time_series_to_X_y(single_train_tsdf.copy())
         test_X, _ = split_time_series_to_X_y(single_test_tsdf.copy())
@@ -70,7 +65,7 @@ class TabPFNWorker(ABC):
         if train_y_has_constant_value:
             logger.info("Found time-series with constant target")
             result = self._predict_on_constant_train_target(
-                single_train_tsdf, single_test_tsdf, quantile_config
+                single_train_tsdf, single_test_tsdf
             )
         else:
             tabpfn = self._get_tabpfn_engine()
@@ -81,7 +76,9 @@ class TabPFNWorker(ABC):
             result.update(
                 {
                     q: q_pred
-                    for q, q_pred in zip(quantile_config, full_pred["quantiles"])
+                    for q, q_pred in zip(
+                        TABPFN_TS_DEFAULT_QUANTILE_CONFIG, full_pred["quantiles"]
+                    )
                 }
             )
@@ -98,7 +95,6 @@ class TabPFNWorker(ABC):
         self,
         single_train_tsdf: TimeSeriesDataFrame,
         single_test_tsdf: TimeSeriesDataFrame,
-        quantile_config: list[float],
     ) -> pd.DataFrame:
         # If train_y is constant, we return the constant value from the training set
         mean_constant = single_train_tsdf.target.iloc[0]
@@ -106,12 +102,14 @@ class TabPFNWorker(ABC):
         # For quantile prediction, we assume that the uncertainty follows a standard normal distribution
         quantile_pred_with_uncertainty = norm.ppf(
-            quantile_config, loc=mean_constant, scale=1
+            TABPFN_TS_DEFAULT_QUANTILE_CONFIG, loc=mean_constant, scale=1
         )
         result.update(
             {
                 q: np.full(len(single_test_tsdf), v)
-                for q, v in zip(quantile_config, quantile_pred_with_uncertainty)
+                for q, v in zip(
+                    TABPFN_TS_DEFAULT_QUANTILE_CONFIG, quantile_pred_with_uncertainty
+                )
             }
         )
@@ -141,8 +139,52 @@ class LocalTabPFN(TabPFNWorker):
     def __init__(
         self,
         config: dict = {},
+        num_workers_per_gpu: int = 4,  # per GPU
+    ):
+        self.num_workers_per_gpu = num_workers_per_gpu
+        # Only support GPU for now (inference on CPU takes too long)
+        if not torch.cuda.is_available():
+            raise ValueError("GPU is required for local TabPFN inference")
+        super().__init__(
+            config, num_workers=torch.cuda.device_count() * self.num_workers_per_gpu
+        )
+    def predict(
+        self,
+        train_tsdf: TimeSeriesDataFrame,
+        test_tsdf: TimeSeriesDataFrame,
     ):
-        super().__init__(config, num_workers=1)
+        total_num_workers = torch.cuda.device_count() * self.num_workers_per_gpu
+        # Split data into chunks for parallel inference on each GPU
+        #   since the time series are of different lengths, we shuffle
+        #   the item_ids s.t. the workload is distributed evenly across GPUs
+        # Also, using 'min' since num_workers could be larger than the number of time series
+        np.random.seed(0)
+        item_ids_chunks = np.array_split(
+            np.random.permutation(train_tsdf.item_ids),
+            min(total_num_workers, len(train_tsdf.item_ids)),
+        )
+        # Run predictions in parallel
+        predictions = Parallel(n_jobs=len(item_ids_chunks), backend="loky")(
+            delayed(self._prediction_routine_per_gpu)(
+                train_tsdf.loc[chunk],
+                test_tsdf.loc[chunk],
+                gpu_id=i
+                % torch.cuda.device_count(),  # Alternate between available GPUs
+            )
+            for i, chunk in enumerate(item_ids_chunks)
+        )
+        predictions = pd.concat(predictions)
+        # Sort predictions according to original item_ids order
+        predictions = predictions.loc[train_tsdf.item_ids]
+        return TimeSeriesDataFrame(predictions)
     def _get_tabpfn_engine(self):
         from tabpfn import TabPFNRegressor
@@ -151,7 +193,67 @@ class LocalTabPFN(TabPFNWorker):
             config = self.config["tabpfn_internal"].copy()
             config["model_path"] = self._parse_model_path(config["model_path"])
-        return TabPFNRegressor(**config)
+        return TabPFNRegressor(**config, random_state=0)
     def _parse_model_path(self, model_name: str) -> str:
         return f"tabpfn-v2-regressor-{model_name}.ckpt"
+    def _prediction_routine_per_gpu(
+        self,
+        train_tsdf: TimeSeriesDataFrame,
+        test_tsdf: TimeSeriesDataFrame,
+        gpu_id: int,
+    ):
+        # Set GPU
+        torch.cuda.set_device(gpu_id)
+        all_pred = []
+        for item_id in tqdm(train_tsdf.item_ids, desc=f"GPU {gpu_id}:"):
+            predictions = self._prediction_routine(
+                item_id,
+                train_tsdf.loc[item_id],
+                test_tsdf.loc[item_id],
+            )
+            all_pred.append(predictions)
+        # Clear GPU cache
+        torch.cuda.empty_cache()
+        return pd.concat(all_pred)
+class MockTabPFN(TabPFNWorker):
+    """
+    Mock TabPFN worker that returns random values for predictions.
+    Can be used for testing or debugging.
+    """
+    class MockTabPFNRegressor:
+        TABPFN_QUANTILE = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
+        def __init__(self, *args, **kwargs):
+            pass
+        def fit(self, *args, **kwargs):
+            pass
+        def predict(self, test_X, output_type="main", **kwargs):
+            if output_type != "main":
+                raise NotImplementedError(
+                    "Only main output is supported for mock TabPFN"
+                )
+            return {
+                "mean": np.random.rand(len(test_X)),
+                "median": np.random.rand(len(test_X)),
+                "mode": np.random.rand(len(test_X)),
+                "quantiles": [
+                    np.random.rand(len(test_X)) for _ in self.TABPFN_QUANTILE
+                ],
+            }
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+    def _get_tabpfn_engine(self):
+        return self.MockTabPFNRegressor()

{tabpfn_time_series-0.1.2.dist-info → tabpfn_time_series-1.0.0.dist-info}/METADATA RENAMED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.4
 Name: tabpfn_time_series
-Version: 0.1.2
-Summary: Zero-shot time series forecasting with TabPFN
+Version: 1.0.0
+Summary: Zero-shot time series forecasting with TabPFNv2
 Project-URL: Homepage, https://github.com/liam-sbhoo/tabpfn-time-series
 Project-URL: Bug Tracker, https://github.com/liam-sbhoo/tabpfn-time-series/issues
 Author-email: Liam Shi Bin Hoo <hoos@tf.uni-freiburg.de>
@@ -10,28 +10,43 @@ Classifier: License :: OSI Approved :: Apache Software License
 Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python :: 3
 Requires-Python: >=3.10
-Requires-Dist: autogluon-timeseries==1.2
-Requires-Dist: gluonts==0.16.0
-Requires-Dist: pandas
-Requires-Dist: tabpfn-client==0.1.1
-Requires-Dist: tabpfn==2.0.0
+Requires-Dist: autogluon-timeseries>=1.2
+Requires-Dist: datasets>=3.3.2
+Requires-Dist: gluonts>=0.16.0
+Requires-Dist: pandas<2.2.0,>=2.1.2
+Requires-Dist: python-dotenv>=1.1.0
+Requires-Dist: pyyaml>=6.0.1
+Requires-Dist: tabpfn-client>=0.1.7
+Requires-Dist: tabpfn>=2.0.9
 Requires-Dist: tqdm
 Provides-Extra: dev
 Requires-Dist: build; extra == 'dev'
 Requires-Dist: jupyter; extra == 'dev'
 Requires-Dist: pre-commit; extra == 'dev'
 Requires-Dist: ruff; extra == 'dev'
+Requires-Dist: submitit>=1.5.2; extra == 'dev'
 Requires-Dist: twine; extra == 'dev'
+Requires-Dist: wandb>=0.19.8; extra == 'dev'
 Description-Content-Type: text/markdown
-# Time Series Forecasting with TabPFN
+# TabPFN-TS
+> Zero-Shot Time Series Forecasting with TabPFNv2
+[![PyPI version](https://badge.fury.io/py/tabpfn-time-series.svg)](https://badge.fury.io/py/tabpfn-time-series)
 [![colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/liam-sbhoo/tabpfn-time-series/blob/main/demo.ipynb)
 [![Discord](https://img.shields.io/discord/1285598202732482621?color=7289da&label=Discord&logo=discord&logoColor=ffffff)](https://discord.com/channels/1285598202732482621/)
-[![arXiv](https://img.shields.io/badge/arXiv-2501.02945-<COLOR>.svg)](https://arxiv.org/abs/2501.02945)
+[![arXiv](https://img.shields.io/badge/arXiv-2501.02945-<COLOR>.svg)](https://arxiv.org/abs/2501.02945v3)
+## 📌 News
+- **27-05-2025**: 📝 New **[paper](https://arxiv.org/abs/2501.02945v3)** version and **v1.0.0** release! Strong [GIFT-EVAL](https://huggingface.co/spaces/Salesforce/GIFT-Eval) results, new AutoSeasonalFeatures, improved CalendarFeatures.
+- **27-01-2025**: 🚀 Ranked _**1st**_ on [GIFT-EVAL](https://huggingface.co/spaces/Salesforce/GIFT-Eval) benchmark<sup>[1]</sup>!
+- **10-10-2024**: 🚀 TabPFN-TS [paper](https://arxiv.org/abs/2501.02945v2) accepted to NeurIPS 2024 [TRL](https://table-representation-learning.github.io/NeurIPS2024/) and [TSALM](https://neurips-time-series-workshop.github.io/) workshops!
+_[1] Last checked on: 10/03/2025_
-We demonstrate that the tabular foundation model **[TabPFN](https://github.com/PriorLabs/TabPFN)**, when paired with minimal featurization, can perform zero-shot time series forecasting. Its performance on point forecasting matches or even slightly outperforms state-of-the-art methods.
+## ✨ Introduction
+We demonstrate that the tabular foundation model **[TabPFNv2](https://github.com/PriorLabs/TabPFN)**, combined with lightweight feature engineering, enables zero-shot time series forecasting for both point and probabilistic tasks. On the **[GIFT-EVAL](https://huggingface.co/spaces/Salesforce/GIFT-Eval)** benchmark, our method achieves performance on par with top-tier models across both evaluation metrics.
 ## 📖 How does it work?
@@ -41,18 +56,19 @@ Our work proposes to frame **univariate time series forecasting** as a **tabular
 Concretely, we:
 1. Transform a time series into a table
-2. Extract features from timestamp and add them to the table
-3. Perform regression on the table using TabPFN
+2. Extract features and add them to the table
+3. Perform regression on the table using TabPFNv2
 4. Use regression results as time series forecasting outputs
-For more details, please refer to our [paper](https://arxiv.org/abs/2501.02945) and our [poster](docs/tabpfn-ts-neurips-poster.pdf) (presented at NeurIPS 2024 TRL and TSALM workshops).
+For more details, please refer to our [paper](https://arxiv.org/abs/2501.02945v3).
+ <!-- and our [poster](docs/tabpfn-ts-neurips-poster.pdf) (presented at NeurIPS 2024 TRL and TSALM workshops). -->
 ## 👉 **Why gives us a try?**
 - **Zero-shot forecasting**: this method is extremely fast and requires no training, making it highly accessible for experimenting with your own problems.
 - **Point and probabilistic forecasting**: it provides accurate point forecasts as well as probabilistic forecasts.
 - **Support for exogenous variables**: if you have exogenous variables, this method can seemlessly incorporate them into the forecasting model.
-On top of that, thanks to **[tabpfn-client](https://github.com/automl/tabpfn-client)** from **[Prior Labs](https://priorlabs.ai)**, you won’t even need your own GPU to run fast inference with TabPFN. 😉 We have included `tabpfn-client` as the default engine in our implementation.
+On top of that, thanks to **[tabpfn-client](https://github.com/automl/tabpfn-client)** from **[Prior Labs](https://priorlabs.ai)**, you won’t even need your own GPU to run fast inference with TabPFNv2. 😉 We have included `tabpfn-client` as the default engine in our implementation.
 ## How to use it?

tabpfn_time_series-1.0.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,15 @@
+tabpfn_time_series/__init__.py,sha256=3XGvQieVbONwhVtn1rITet6HNiTMWQTxHm2xLlGI5ew,314
+tabpfn_time_series/data_preparation.py,sha256=iNW7sAnRkTgmzzOEHBhkkTwm_lQ3p_Q9xgAQ5PbkOts,5416
+tabpfn_time_series/defaults.py,sha256=u2_JnwxiZ5NNibzyNpsE63KuP3TcmOL1iAP8llZ2rJk,238
+tabpfn_time_series/plot.py,sha256=bwSYcWBanzPrUxXKFsbqG8fyGsOJZfgU2v3NsxzTSXo,6571
+tabpfn_time_series/predictor.py,sha256=JzuV34zERf1XDLacGzSFJb-o077qd7GlKC6lvD62EPk,1457
+tabpfn_time_series/tabpfn_worker.py,sha256=zvFwg4Dc01_m5emqmVITBr6W_cNZ04tMyntmj40pyPE,8299
+tabpfn_time_series/features/__init__.py,sha256=lzdZWkEfntfg3ZHqNNbfbg-3o_VIzju0tebdRu3AzF4,421
+tabpfn_time_series/features/auto_features.py,sha256=3OqqY2h7umcoLjLx4hOXypLTjwzrMtd6cQKTNi83vrU,11561
+tabpfn_time_series/features/basic_features.py,sha256=OV3B__S30-CX88vGjwYQDWqAbJajQw80PxcnvJVUbm4,2955
+tabpfn_time_series/features/feature_generator_base.py,sha256=jtySWLJyX4E31v6CbX44EHa8cdz7OMyauf4ltNEQeAQ,534
+tabpfn_time_series/features/feature_transformer.py,sha256=mUsbnPUhJ4lPcnGWk8Ag1hgCOE1V5I0iQRT4VFgQEso,1763
+tabpfn_time_series-1.0.0.dist-info/METADATA,sha256=CvXqIOHNTKyd-zpCednsqa3FloPk6lFJ4ISG0eSEWx4,4434
+tabpfn_time_series-1.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+tabpfn_time_series-1.0.0.dist-info/licenses/LICENSE.txt,sha256=iwhPL7kIWQG6gyLZZwIMDItGrNgxMDIq9itxkUSMapY,11345
+tabpfn_time_series-1.0.0.dist-info/RECORD,,

tabpfn_time_series/feature.py DELETED Viewed

@@ -1,78 +0,0 @@
-import numpy as np
-import pandas as pd
-from typing import Tuple, List, Callable
-import gluonts.time_feature
-from autogluon.timeseries import TimeSeriesDataFrame
-class DefaultFeatures:
-    @staticmethod
-    def add_running_index(df: pd.DataFrame) -> pd.Series:
-        df["running_index"] = range(len(df))
-        return df
-    @staticmethod
-    def add_calendar_features(df: pd.DataFrame) -> pd.DataFrame:
-        CALENDAR_COMPONENT = [
-            "year",
-            # "month",
-            # "day",
-        ]
-        CALENDAR_FEATURES = [
-            # (feature, natural seasonality)
-            ("hour_of_day", 24),
-            ("day_of_week", 7),
-            ("day_of_month", 30.5),
-            ("day_of_year", 365),
-            ("week_of_year", 52),
-            ("month_of_year", 12),
-        ]
-        timestamps = df.index.get_level_values("timestamp")
-        for component_name in CALENDAR_COMPONENT:
-            df[component_name] = getattr(timestamps, component_name)
-        for feature_name, seasonality in CALENDAR_FEATURES:
-            feature_func = getattr(gluonts.time_feature, f"{feature_name}_index")
-            feature = feature_func(timestamps).astype(np.int32)
-            if seasonality is not None:
-                df[f"{feature_name}_sin"] = np.sin(
-                    2 * np.pi * feature / (seasonality - 1)
-                )  # seasonality - 1 because the value starts from 0
-                df[f"{feature_name}_cos"] = np.cos(
-                    2 * np.pi * feature / (seasonality - 1)
-                )
-            else:
-                df[feature_name] = feature
-        return df
-class FeatureTransformer:
-    @staticmethod
-    def add_features(
-        train_tsdf: TimeSeriesDataFrame,
-        test_tsdf: TimeSeriesDataFrame,
-        feature_generators: List[Callable[[TimeSeriesDataFrame], TimeSeriesDataFrame]],
-        target_column: str = "target",
-    ) -> Tuple[TimeSeriesDataFrame, TimeSeriesDataFrame]:
-        assert target_column in train_tsdf.columns
-        assert test_tsdf[target_column].isna().all()
-        # Join train and test tsdf
-        tsdf = pd.concat([train_tsdf, test_tsdf])
-        # Apply feature generators
-        for func in feature_generators:
-            tsdf = tsdf.groupby(level="item_id", group_keys=False).apply(func)
-        # Split train and test tsdf
-        train_tsdf = tsdf.iloc[: len(train_tsdf)]
-        test_tsdf = tsdf.iloc[len(train_tsdf) :]
-        assert test_tsdf[target_column].isna().all()
-        return train_tsdf, test_tsdf

tabpfn_time_series-0.1.2.dist-info/RECORD DELETED Viewed

@@ -1,11 +0,0 @@
-tabpfn_time_series/__init__.py,sha256=5ruHrmKBQRIZ3WXLA8du4JKttF55ntnI74hkRsHThQ8,256
-tabpfn_time_series/data_preparation.py,sha256=iNW7sAnRkTgmzzOEHBhkkTwm_lQ3p_Q9xgAQ5PbkOts,5416
-tabpfn_time_series/defaults.py,sha256=u2_JnwxiZ5NNibzyNpsE63KuP3TcmOL1iAP8llZ2rJk,238
-tabpfn_time_series/feature.py,sha256=_9FxfQfgPOOO1MiT8hB8523eZ3Nc5oKuoY7vcohKZZc,2531
-tabpfn_time_series/plot.py,sha256=bwSYcWBanzPrUxXKFsbqG8fyGsOJZfgU2v3NsxzTSXo,6571
-tabpfn_time_series/predictor.py,sha256=W9JijaxFaR0chfiW7m4RuDQ0wrRcJezDWVwCBEOQDFk,1502
-tabpfn_time_series/tabpfn_worker.py,sha256=XNpqLEW51PgzrEopNNdtGdYArMCHT4yeBK3BS3z25K0,5021
-tabpfn_time_series-0.1.2.dist-info/METADATA,sha256=hO69b8GN3GDRIetG4DGtxpdMubc8sm8h_aI2RwEto2U,3285
-tabpfn_time_series-0.1.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-tabpfn_time_series-0.1.2.dist-info/licenses/LICENSE.txt,sha256=iwhPL7kIWQG6gyLZZwIMDItGrNgxMDIq9itxkUSMapY,11345
-tabpfn_time_series-0.1.2.dist-info/RECORD,,

{tabpfn_time_series-0.1.2.dist-info → tabpfn_time_series-1.0.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{tabpfn_time_series-0.1.2.dist-info → tabpfn_time_series-1.0.0.dist-info}/licenses/LICENSE.txt RENAMED Viewed

File without changes

tabpfn-time-series 0.1.2__py3-none-any.whl → 1.0.0__py3-none-any.whl

tabpfn-time-series 0.1.2py3-none-any.whl → 1.0.0py3-none-any.whl