tabpfn-time-series 0.1.3__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,10 @@
1
- from .feature import DefaultFeatures, FeatureTransformer
1
+ from .features import FeatureTransformer
2
2
  from .predictor import TabPFNTimeSeriesPredictor, TabPFNMode
3
3
  from .defaults import TABPFN_TS_DEFAULT_QUANTILE_CONFIG
4
4
 
5
5
  __version__ = "0.1.0"
6
6
 
7
7
  __all__ = [
8
- "DefaultFeatures",
9
8
  "FeatureTransformer",
10
9
  "TabPFNTimeSeriesPredictor",
11
10
  "TabPFNMode",
@@ -0,0 +1,17 @@
1
+ from .basic_features import (
2
+ RunningIndexFeature,
3
+ CalendarFeature,
4
+ AdditionalCalendarFeature,
5
+ PeriodicSinCosineFeature,
6
+ )
7
+ from .auto_features import AutoSeasonalFeature
8
+ from .feature_transformer import FeatureTransformer
9
+
10
+ __all__ = [
11
+ "RunningIndexFeature",
12
+ "CalendarFeature",
13
+ "AdditionalCalendarFeature",
14
+ "AutoSeasonalFeature",
15
+ "PeriodicSinCosineFeature",
16
+ "FeatureTransformer",
17
+ ]
@@ -0,0 +1,307 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from typing import List, Optional, Tuple, Literal
4
+
5
+ import logging
6
+
7
+ from scipy import fft
8
+ from scipy.signal import find_peaks
9
+ from statsmodels.tsa.stattools import acf
10
+
11
+ from tabpfn_time_series.features.feature_generator_base import (
12
+ FeatureGenerator,
13
+ )
14
+ from tabpfn_time_series.features.basic_features import (
15
+ PeriodicSinCosineFeature,
16
+ )
17
+
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class AutoSeasonalFeature(FeatureGenerator):
23
+ class Config:
24
+ max_top_k: int = 5
25
+ do_detrend: bool = True
26
+ detrend_type: Literal["first_diff", "loess", "linear", "constant"] = "linear"
27
+ use_peaks_only: bool = True
28
+ apply_hann_window: bool = True
29
+ zero_padding_factor: int = 2
30
+ round_to_closest_integer: bool = True
31
+ validate_with_acf: bool = False
32
+ sampling_interval: float = 1.0
33
+ magnitude_threshold: Optional[float] = 0.05
34
+ relative_threshold: bool = True
35
+ exclude_zero: bool = True
36
+
37
+ def __init__(self, config: Optional[dict] = None):
38
+ # Create default config from Config class
39
+ default_config = {
40
+ k: v for k, v in vars(self.Config).items() if not k.startswith("__")
41
+ }
42
+
43
+ # Initialize config with defaults
44
+ self.config = default_config.copy()
45
+
46
+ # Update with user-provided config if any
47
+ if config is not None:
48
+ self.config.update(config)
49
+
50
+ # Validate config parameters
51
+ self._validate_config()
52
+
53
+ logger.debug(f"Initialized AutoSeasonalFeature with config: {self.config}")
54
+
55
+ def _validate_config(self):
56
+ """Validate configuration parameters"""
57
+ if self.config["max_top_k"] < 1:
58
+ logger.warning("max_top_k must be at least 1, setting to 1")
59
+ self.config["max_top_k"] = 1
60
+
61
+ if self.config["zero_padding_factor"] < 1:
62
+ logger.warning("zero_padding_factor must be at least 1, setting to 1")
63
+ self.config["zero_padding_factor"] = 1
64
+
65
+ if self.config["detrend_type"] not in [
66
+ "first_diff",
67
+ "loess",
68
+ "linear",
69
+ "constant",
70
+ ]:
71
+ logger.warning(
72
+ f"Invalid detrend_type: {self.config['detrend_type']}, using 'linear'"
73
+ )
74
+ self.config["detrend_type"] = "linear"
75
+
76
+ def generate(self, df: pd.DataFrame) -> pd.DataFrame:
77
+ df = df.copy()
78
+
79
+ # Detect seasonal periods from target data
80
+ detected_periods_and_magnitudes = self.find_seasonal_periods(
81
+ df.target, **self.config
82
+ )
83
+ logger.debug(
84
+ f"Found {len(detected_periods_and_magnitudes)} seasonal periods: {detected_periods_and_magnitudes}"
85
+ )
86
+
87
+ # Extract just the periods (without magnitudes)
88
+ periods = [period for period, _ in detected_periods_and_magnitudes]
89
+
90
+ # Generate features for detected periods using PeriodicSinCosineFeature
91
+ if periods:
92
+ feature_generator = PeriodicSinCosineFeature(periods=periods)
93
+ df = feature_generator.generate(df)
94
+
95
+ # Standardize column names for consistency across time series
96
+ renamed_columns = {}
97
+ for i, period in enumerate(periods):
98
+ renamed_columns[f"sin_{period}"] = f"sin_#{i}"
99
+ renamed_columns[f"cos_{period}"] = f"cos_#{i}"
100
+
101
+ df = df.rename(columns=renamed_columns)
102
+
103
+ # Add placeholder zero columns for missing periods up to max_top_k
104
+ for i in range(len(periods), self.config["max_top_k"]):
105
+ df[f"sin_#{i}"] = 0.0
106
+ df[f"cos_#{i}"] = 0.0
107
+
108
+ return df
109
+
110
+ @staticmethod
111
+ def find_seasonal_periods(
112
+ target_values: pd.Series,
113
+ max_top_k: int = 10,
114
+ do_detrend: bool = True,
115
+ detrend_type: Literal[
116
+ "first_diff", "loess", "linear", "constant"
117
+ ] = "first_diff",
118
+ use_peaks_only: bool = True,
119
+ apply_hann_window: bool = True,
120
+ zero_padding_factor: int = 2,
121
+ round_to_closest_integer: bool = True,
122
+ validate_with_acf: bool = False,
123
+ sampling_interval: float = 1.0,
124
+ magnitude_threshold: Optional[
125
+ float
126
+ ] = 0.05, # Default relative threshold (5% of max)
127
+ relative_threshold: bool = True, # Interpret threshold as a fraction of max FFT magnitude
128
+ exclude_zero: bool = False,
129
+ ) -> List[Tuple[float, float]]:
130
+ """
131
+ Identify dominant seasonal periods in a time series using FFT.
132
+
133
+ Parameters:
134
+ - target_values: pd.Series
135
+ Input time series data.
136
+ - max_top_k: int
137
+ Maximum number of dominant periods to return.
138
+ - do_detrend: bool
139
+ If True, remove the linear trend from the signal.
140
+ - use_peaks_only: bool
141
+ If True, consider only local peaks in the FFT magnitude spectrum.
142
+ - apply_hann_window: bool
143
+ If True, apply a Hann window to reduce spectral leakage.
144
+ - zero_padding_factor: int
145
+ Factor by which to zero-pad the signal for finer frequency resolution.
146
+ - round_to_closest_integer: bool
147
+ If True, round the detected periods to the nearest integer.
148
+ - validate_with_acf: bool
149
+ If True, validate detected periods against the autocorrelation function.
150
+ - sampling_interval: float
151
+ Time interval between consecutive samples.
152
+ - magnitude_threshold: Optional[float]
153
+ Threshold to filter out less significant frequency components.
154
+ Default is 0.05, interpreted as 5% of the maximum FFT magnitude if relative_threshold is True.
155
+ - relative_threshold: bool
156
+ If True, the `magnitude_threshold` is interpreted as a fraction of the maximum FFT magnitude.
157
+ Otherwise, it is treated as an absolute threshold value.
158
+ - exclude_zero: bool
159
+ If True, exclude periods of 0 from the results.
160
+
161
+ Returns:
162
+ - List[Tuple[float, float]]:
163
+ A list of (period, magnitude) tuples, sorted in descending order by magnitude.
164
+ """
165
+ # Convert the Pandas Series to a NumPy array
166
+ values = np.array(target_values, dtype=float)
167
+
168
+ # Quick hack to ignore the test_X
169
+ # (Assuming train_X target is not NaN, and test_X target is NaN)
170
+ # Dropping all the NaN values
171
+ values = values[~np.isnan(values)]
172
+
173
+ N_original = len(values)
174
+
175
+ # Detrend the signal using a linear detrend method if requested
176
+ if do_detrend:
177
+ values = detrend(values, detrend_type)
178
+
179
+ # Apply a Hann window to reduce spectral leakage
180
+ if apply_hann_window:
181
+ window = np.hanning(N_original)
182
+ values = values * window
183
+
184
+ # Zero-pad the signal for improved frequency resolution
185
+ if zero_padding_factor > 1:
186
+ padded_length = int(N_original * zero_padding_factor)
187
+ padded_values = np.zeros(padded_length)
188
+ padded_values[:N_original] = values
189
+ values = padded_values
190
+ N = padded_length
191
+ else:
192
+ N = N_original
193
+
194
+ # Compute the FFT (using rfft) and obtain frequency bins
195
+ fft_values = fft.rfft(values)
196
+ fft_magnitudes = np.abs(fft_values)
197
+ freqs = np.fft.rfftfreq(N, d=sampling_interval)
198
+
199
+ # Exclude the DC component (0 Hz) to avoid bias from the signal's mean
200
+ fft_magnitudes[0] = 0.0
201
+
202
+ # Determine the threshold (absolute value)
203
+ if magnitude_threshold is not None and relative_threshold:
204
+ threshold_value = magnitude_threshold * np.max(fft_magnitudes)
205
+ else:
206
+ threshold_value = magnitude_threshold
207
+
208
+ # Identify dominant frequencies
209
+ if use_peaks_only:
210
+ if threshold_value is not None:
211
+ peak_indices, _ = find_peaks(fft_magnitudes, height=threshold_value)
212
+ else:
213
+ peak_indices, _ = find_peaks(fft_magnitudes)
214
+ if len(peak_indices) == 0:
215
+ # Fallback to considering all frequency bins if no peaks are found
216
+ peak_indices = np.arange(len(fft_magnitudes))
217
+ # Sort the peak indices by magnitude in descending order
218
+ sorted_peak_indices = peak_indices[
219
+ np.argsort(fft_magnitudes[peak_indices])[::-1]
220
+ ]
221
+ top_indices = sorted_peak_indices[:max_top_k]
222
+ else:
223
+ sorted_indices = np.argsort(fft_magnitudes)[::-1]
224
+ if threshold_value is not None:
225
+ sorted_indices = [
226
+ i for i in sorted_indices if fft_magnitudes[i] >= threshold_value
227
+ ]
228
+ top_indices = sorted_indices[:max_top_k]
229
+
230
+ # Convert frequencies to periods (avoiding division by zero)
231
+ periods = np.zeros_like(freqs)
232
+ non_zero = freqs > 0
233
+ periods[non_zero] = 1.0 / freqs[non_zero]
234
+ top_periods = periods[top_indices]
235
+
236
+ logger.debug(f"Top periods: {top_periods}")
237
+
238
+ # Optionally round the periods to the nearest integer
239
+ if round_to_closest_integer:
240
+ top_periods = np.round(top_periods)
241
+
242
+ # Filter out zero periods if requested
243
+ if exclude_zero:
244
+ non_zero_mask = top_periods != 0
245
+ top_periods = top_periods[non_zero_mask]
246
+ top_indices = top_indices[non_zero_mask]
247
+
248
+ # Keep unique periods only
249
+ if len(top_periods) > 0:
250
+ unique_period_indices = np.unique(top_periods, return_index=True)[1]
251
+ top_periods = top_periods[unique_period_indices]
252
+ top_indices = top_indices[unique_period_indices]
253
+
254
+ # Pair each period with its corresponding magnitude
255
+ results = [
256
+ (top_periods[i], fft_magnitudes[top_indices[i]])
257
+ for i in range(len(top_indices))
258
+ ]
259
+
260
+ # Validate with ACF if requested and filter the results accordingly
261
+ if validate_with_acf:
262
+ # Compute ACF on the original (non-padded) detrended signal
263
+ acf_values = acf(
264
+ np.array(target_values, dtype=float)[:N_original],
265
+ nlags=N_original,
266
+ fft=True,
267
+ )
268
+ acf_peak_indices, _ = find_peaks(
269
+ acf_values, height=1.96 / np.sqrt(N_original)
270
+ )
271
+ validated_results = []
272
+ for period, mag in results:
273
+ period_int = int(round(period))
274
+ if period_int < len(acf_values) and any(
275
+ abs(period_int - peak) <= 1 for peak in acf_peak_indices
276
+ ):
277
+ validated_results.append((period, mag))
278
+ if validated_results:
279
+ results = validated_results
280
+
281
+ # Ensure the final results are sorted in descending order by magnitude
282
+ results.sort(key=lambda x: x[1], reverse=True)
283
+
284
+ return results
285
+
286
+
287
+ def detrend(
288
+ x: np.ndarray, detrend_type: Literal["first_diff", "loess", "linear"]
289
+ ) -> np.ndarray:
290
+ if detrend_type == "first_diff":
291
+ return np.diff(x, prepend=x[0])
292
+
293
+ elif detrend_type == "loess":
294
+ from statsmodels.api import nonparametric
295
+
296
+ indices = np.arange(len(x))
297
+ lowess = nonparametric.lowess(x, indices, frac=0.1)
298
+ trend = lowess[:, 1]
299
+ return x - trend
300
+
301
+ elif detrend_type in ["linear", "constant"]:
302
+ from scipy.signal import detrend as scipy_detrend
303
+
304
+ return scipy_detrend(x, type=detrend_type)
305
+
306
+ else:
307
+ raise ValueError(f"Invalid detrend method: {detrend_type}")
@@ -0,0 +1,88 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from typing import List, Dict, Optional
4
+
5
+ import gluonts.time_feature
6
+
7
+ from tabpfn_time_series.features.feature_generator_base import (
8
+ FeatureGenerator,
9
+ )
10
+
11
+
12
+ class RunningIndexFeature(FeatureGenerator):
13
+ def generate(self, df: pd.DataFrame) -> pd.DataFrame:
14
+ df = df.copy()
15
+ df["running_index"] = range(len(df))
16
+ return df
17
+
18
+
19
+ class CalendarFeature(FeatureGenerator):
20
+ def __init__(
21
+ self,
22
+ components: Optional[List[str]] = None,
23
+ seasonal_features: Optional[Dict[str, List[float]]] = None,
24
+ ):
25
+ self.components = components or ["year"]
26
+ self.seasonal_features = seasonal_features or {
27
+ # (feature, natural seasonality)
28
+ "second_of_minute": [60],
29
+ "minute_of_hour": [60],
30
+ "hour_of_day": [24],
31
+ "day_of_week": [7],
32
+ "day_of_month": [30.5],
33
+ "day_of_year": [365],
34
+ "week_of_year": [52],
35
+ "month_of_year": [12],
36
+ }
37
+
38
+ def generate(self, df: pd.DataFrame) -> pd.DataFrame:
39
+ df = df.copy()
40
+ timestamps = df.index.get_level_values("timestamp")
41
+
42
+ # Add basic calendar components
43
+ for component in self.components:
44
+ df[component] = getattr(timestamps, component)
45
+
46
+ # Add seasonal features
47
+ for feature_name, periods in self.seasonal_features.items():
48
+ feature_func = getattr(gluonts.time_feature, f"{feature_name}_index")
49
+ feature = feature_func(timestamps).astype(np.int32)
50
+
51
+ if periods is not None:
52
+ for period in periods:
53
+ period = period - 1 # Adjust for 0-based indexing
54
+ df[f"{feature_name}_sin"] = np.sin(2 * np.pi * feature / period)
55
+ df[f"{feature_name}_cos"] = np.cos(2 * np.pi * feature / period)
56
+ else:
57
+ df[feature_name] = feature
58
+
59
+ return df
60
+
61
+
62
+ class AdditionalCalendarFeature(CalendarFeature):
63
+ def __init__(
64
+ self,
65
+ components: Optional[List[str]] = None,
66
+ additional_seasonal_features: Optional[Dict[str, List[float]]] = None,
67
+ ):
68
+ super().__init__(components=components)
69
+
70
+ self.seasonal_features = {
71
+ **additional_seasonal_features,
72
+ **self.seasonal_features,
73
+ }
74
+
75
+
76
+ class PeriodicSinCosineFeature(FeatureGenerator):
77
+ def __init__(self, periods: List[float], name_suffix: str = None):
78
+ self.periods = periods
79
+ self.name_suffix = name_suffix
80
+
81
+ def generate(self, df: pd.DataFrame) -> pd.DataFrame:
82
+ df = df.copy()
83
+ for i, period in enumerate(self.periods):
84
+ name_suffix = f"{self.name_suffix}_{i}" if self.name_suffix else f"{period}"
85
+ df[f"sin_{name_suffix}"] = np.sin(2 * np.pi * np.arange(len(df)) / period)
86
+ df[f"cos_{name_suffix}"] = np.cos(2 * np.pi * np.arange(len(df)) / period)
87
+
88
+ return df
@@ -0,0 +1,21 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+ import pandas as pd
4
+
5
+
6
+ class FeatureGenerator(ABC):
7
+ """Abstract base class for feature generators"""
8
+
9
+ @abstractmethod
10
+ def generate(self, df: pd.DataFrame) -> pd.DataFrame:
11
+ """Generate features for the given dataframe"""
12
+ pass
13
+
14
+ def __call__(self, df: pd.DataFrame) -> pd.DataFrame:
15
+ return self.generate(df)
16
+
17
+ def __str__(self) -> str:
18
+ return f"{self.__class__.__name__}_{self.__dict__}"
19
+
20
+ def __repr__(self) -> str:
21
+ return self.__str__()
@@ -0,0 +1,53 @@
1
+ from typing import List, Tuple
2
+
3
+ import pandas as pd
4
+
5
+ from autogluon.timeseries import TimeSeriesDataFrame
6
+ from tabpfn_time_series.features.feature_generator_base import (
7
+ FeatureGenerator,
8
+ )
9
+
10
+
11
+ class FeatureTransformer:
12
+ def __init__(self, feature_generators: List[FeatureGenerator]):
13
+ self.feature_generators = feature_generators
14
+
15
+ def transform(
16
+ self,
17
+ train_tsdf: TimeSeriesDataFrame,
18
+ test_tsdf: TimeSeriesDataFrame,
19
+ target_column: str = "target",
20
+ ) -> Tuple[TimeSeriesDataFrame, TimeSeriesDataFrame]:
21
+ """Transform both train and test data with the configured feature generators"""
22
+
23
+ self._validate_input(train_tsdf, test_tsdf, target_column)
24
+ tsdf = pd.concat([train_tsdf, test_tsdf])
25
+
26
+ # Apply all feature generators
27
+ for generator in self.feature_generators:
28
+ tsdf = tsdf.groupby(level="item_id", group_keys=False).apply(generator)
29
+
30
+ # Split train and test tsdf
31
+ train_tsdf = tsdf.iloc[: len(train_tsdf)]
32
+ test_tsdf = tsdf.iloc[len(train_tsdf) :]
33
+
34
+ assert (
35
+ not train_tsdf[target_column].isna().any()
36
+ ), "All target values in train_tsdf should be non-NaN"
37
+ assert test_tsdf[target_column].isna().all()
38
+
39
+ return train_tsdf, test_tsdf
40
+
41
+ @staticmethod
42
+ def _validate_input(
43
+ train_tsdf: TimeSeriesDataFrame,
44
+ test_tsdf: TimeSeriesDataFrame,
45
+ target_column: str,
46
+ ):
47
+ if target_column not in train_tsdf.columns:
48
+ raise ValueError(
49
+ f"Target column '{target_column}' not found in training data"
50
+ )
51
+
52
+ if not test_tsdf[target_column].isna().all():
53
+ raise ValueError("Test data should not contain target values")
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tabpfn_time_series
3
- Version: 0.1.3
4
- Summary: Zero-shot time series forecasting with TabPFN
3
+ Version: 1.0.0
4
+ Summary: Zero-shot time series forecasting with TabPFNv2
5
5
  Project-URL: Homepage, https://github.com/liam-sbhoo/tabpfn-time-series
6
6
  Project-URL: Bug Tracker, https://github.com/liam-sbhoo/tabpfn-time-series/issues
7
7
  Author-email: Liam Shi Bin Hoo <hoos@tf.uni-freiburg.de>
@@ -14,32 +14,39 @@ Requires-Dist: autogluon-timeseries>=1.2
14
14
  Requires-Dist: datasets>=3.3.2
15
15
  Requires-Dist: gluonts>=0.16.0
16
16
  Requires-Dist: pandas<2.2.0,>=2.1.2
17
- Requires-Dist: tabpfn-client>=0.1.1
18
- Requires-Dist: tabpfn>=2.0.0
17
+ Requires-Dist: python-dotenv>=1.1.0
18
+ Requires-Dist: pyyaml>=6.0.1
19
+ Requires-Dist: tabpfn-client>=0.1.7
20
+ Requires-Dist: tabpfn>=2.0.9
19
21
  Requires-Dist: tqdm
20
22
  Provides-Extra: dev
21
23
  Requires-Dist: build; extra == 'dev'
22
24
  Requires-Dist: jupyter; extra == 'dev'
23
25
  Requires-Dist: pre-commit; extra == 'dev'
24
26
  Requires-Dist: ruff; extra == 'dev'
27
+ Requires-Dist: submitit>=1.5.2; extra == 'dev'
25
28
  Requires-Dist: twine; extra == 'dev'
29
+ Requires-Dist: wandb>=0.19.8; extra == 'dev'
26
30
  Description-Content-Type: text/markdown
27
31
 
28
- # Zero-Shot Time Series Forecasting with TabPFN
32
+ # TabPFN-TS
33
+
34
+ > Zero-Shot Time Series Forecasting with TabPFNv2
29
35
 
30
36
  [![PyPI version](https://badge.fury.io/py/tabpfn-time-series.svg)](https://badge.fury.io/py/tabpfn-time-series)
31
37
  [![colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/liam-sbhoo/tabpfn-time-series/blob/main/demo.ipynb)
32
38
  [![Discord](https://img.shields.io/discord/1285598202732482621?color=7289da&label=Discord&logo=discord&logoColor=ffffff)](https://discord.com/channels/1285598202732482621/)
33
- [![arXiv](https://img.shields.io/badge/arXiv-2501.02945-<COLOR>.svg)](https://arxiv.org/abs/2501.02945)
39
+ [![arXiv](https://img.shields.io/badge/arXiv-2501.02945-<COLOR>.svg)](https://arxiv.org/abs/2501.02945v3)
34
40
 
35
41
  ## 📌 News
42
+ - **27-05-2025**: 📝 New **[paper](https://arxiv.org/abs/2501.02945v3)** version and **v1.0.0** release! Strong [GIFT-EVAL](https://huggingface.co/spaces/Salesforce/GIFT-Eval) results, new AutoSeasonalFeatures, improved CalendarFeatures.
36
43
  - **27-01-2025**: 🚀 Ranked _**1st**_ on [GIFT-EVAL](https://huggingface.co/spaces/Salesforce/GIFT-Eval) benchmark<sup>[1]</sup>!
37
- - **10-10-2024**: 🚀 TabPFN-TS [paper](https://arxiv.org/abs/2501.02945) accepted to NeurIPS 2024 [TRL](https://table-representation-learning.github.io/NeurIPS2024/) and [TSALM](https://neurips-time-series-workshop.github.io/) workshops!
44
+ - **10-10-2024**: 🚀 TabPFN-TS [paper](https://arxiv.org/abs/2501.02945v2) accepted to NeurIPS 2024 [TRL](https://table-representation-learning.github.io/NeurIPS2024/) and [TSALM](https://neurips-time-series-workshop.github.io/) workshops!
38
45
 
39
46
  _[1] Last checked on: 10/03/2025_
40
47
 
41
48
  ## ✨ Introduction
42
- We demonstrate that the tabular foundation model **[TabPFN](https://github.com/PriorLabs/TabPFN)**, when paired with minimal featurization, can perform zero-shot time series forecasting. Its performance on point forecasting matches or even slightly outperforms state-of-the-art methods.
49
+ We demonstrate that the tabular foundation model **[TabPFNv2](https://github.com/PriorLabs/TabPFN)**, combined with lightweight feature engineering, enables zero-shot time series forecasting for both point and probabilistic tasks. On the **[GIFT-EVAL](https://huggingface.co/spaces/Salesforce/GIFT-Eval)** benchmark, our method achieves performance on par with top-tier models across both evaluation metrics.
43
50
 
44
51
  ## 📖 How does it work?
45
52
 
@@ -49,27 +56,22 @@ Our work proposes to frame **univariate time series forecasting** as a **tabular
49
56
 
50
57
  Concretely, we:
51
58
  1. Transform a time series into a table
52
- 2. Extract features from timestamp and add them to the table
53
- 3. Perform regression on the table using TabPFN
59
+ 2. Extract features and add them to the table
60
+ 3. Perform regression on the table using TabPFNv2
54
61
  4. Use regression results as time series forecasting outputs
55
62
 
56
- For more details, please refer to our [paper](https://arxiv.org/abs/2501.02945) and our [poster](docs/tabpfn-ts-neurips-poster.pdf) (presented at NeurIPS 2024 TRL and TSALM workshops).
63
+ For more details, please refer to our [paper](https://arxiv.org/abs/2501.02945v3).
64
+ <!-- and our [poster](docs/tabpfn-ts-neurips-poster.pdf) (presented at NeurIPS 2024 TRL and TSALM workshops). -->
57
65
 
58
66
  ## 👉 **Why gives us a try?**
59
67
  - **Zero-shot forecasting**: this method is extremely fast and requires no training, making it highly accessible for experimenting with your own problems.
60
68
  - **Point and probabilistic forecasting**: it provides accurate point forecasts as well as probabilistic forecasts.
61
69
  - **Support for exogenous variables**: if you have exogenous variables, this method can seemlessly incorporate them into the forecasting model.
62
70
 
63
- On top of that, thanks to **[tabpfn-client](https://github.com/automl/tabpfn-client)** from **[Prior Labs](https://priorlabs.ai)**, you won’t even need your own GPU to run fast inference with TabPFN. 😉 We have included `tabpfn-client` as the default engine in our implementation.
71
+ On top of that, thanks to **[tabpfn-client](https://github.com/automl/tabpfn-client)** from **[Prior Labs](https://priorlabs.ai)**, you won’t even need your own GPU to run fast inference with TabPFNv2. 😉 We have included `tabpfn-client` as the default engine in our implementation.
64
72
 
65
73
  ## How to use it?
66
74
 
67
75
  [![colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/liam-sbhoo/tabpfn-time-series/blob/main/demo.ipynb)
68
76
 
69
77
  The demo should explain it all. 😉
70
-
71
- ## 📊 GIFT-EVAL Benchmark
72
-
73
- We have submitted our results to the [GIFT-EVAL](https://huggingface.co/spaces/Salesforce/GIFT-Eval) benchmark. Stay tuned for results!
74
-
75
- For more details regarding the evaluation setup, please refer to [README.md](gift_eval/README.md).
@@ -0,0 +1,15 @@
1
+ tabpfn_time_series/__init__.py,sha256=3XGvQieVbONwhVtn1rITet6HNiTMWQTxHm2xLlGI5ew,314
2
+ tabpfn_time_series/data_preparation.py,sha256=iNW7sAnRkTgmzzOEHBhkkTwm_lQ3p_Q9xgAQ5PbkOts,5416
3
+ tabpfn_time_series/defaults.py,sha256=u2_JnwxiZ5NNibzyNpsE63KuP3TcmOL1iAP8llZ2rJk,238
4
+ tabpfn_time_series/plot.py,sha256=bwSYcWBanzPrUxXKFsbqG8fyGsOJZfgU2v3NsxzTSXo,6571
5
+ tabpfn_time_series/predictor.py,sha256=JzuV34zERf1XDLacGzSFJb-o077qd7GlKC6lvD62EPk,1457
6
+ tabpfn_time_series/tabpfn_worker.py,sha256=zvFwg4Dc01_m5emqmVITBr6W_cNZ04tMyntmj40pyPE,8299
7
+ tabpfn_time_series/features/__init__.py,sha256=lzdZWkEfntfg3ZHqNNbfbg-3o_VIzju0tebdRu3AzF4,421
8
+ tabpfn_time_series/features/auto_features.py,sha256=3OqqY2h7umcoLjLx4hOXypLTjwzrMtd6cQKTNi83vrU,11561
9
+ tabpfn_time_series/features/basic_features.py,sha256=OV3B__S30-CX88vGjwYQDWqAbJajQw80PxcnvJVUbm4,2955
10
+ tabpfn_time_series/features/feature_generator_base.py,sha256=jtySWLJyX4E31v6CbX44EHa8cdz7OMyauf4ltNEQeAQ,534
11
+ tabpfn_time_series/features/feature_transformer.py,sha256=mUsbnPUhJ4lPcnGWk8Ag1hgCOE1V5I0iQRT4VFgQEso,1763
12
+ tabpfn_time_series-1.0.0.dist-info/METADATA,sha256=CvXqIOHNTKyd-zpCednsqa3FloPk6lFJ4ISG0eSEWx4,4434
13
+ tabpfn_time_series-1.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
+ tabpfn_time_series-1.0.0.dist-info/licenses/LICENSE.txt,sha256=iwhPL7kIWQG6gyLZZwIMDItGrNgxMDIq9itxkUSMapY,11345
15
+ tabpfn_time_series-1.0.0.dist-info/RECORD,,
@@ -1,78 +0,0 @@
1
- import numpy as np
2
- import pandas as pd
3
- from typing import Tuple, List, Callable
4
-
5
- import gluonts.time_feature
6
- from autogluon.timeseries import TimeSeriesDataFrame
7
-
8
-
9
- class DefaultFeatures:
10
- @staticmethod
11
- def add_running_index(df: pd.DataFrame) -> pd.Series:
12
- df["running_index"] = range(len(df))
13
- return df
14
-
15
- @staticmethod
16
- def add_calendar_features(df: pd.DataFrame) -> pd.DataFrame:
17
- CALENDAR_COMPONENT = [
18
- "year",
19
- # "month",
20
- # "day",
21
- ]
22
-
23
- CALENDAR_FEATURES = [
24
- # (feature, natural seasonality)
25
- ("hour_of_day", 24),
26
- ("day_of_week", 7),
27
- ("day_of_month", 30.5),
28
- ("day_of_year", 365),
29
- ("week_of_year", 52),
30
- ("month_of_year", 12),
31
- ]
32
-
33
- timestamps = df.index.get_level_values("timestamp")
34
-
35
- for component_name in CALENDAR_COMPONENT:
36
- df[component_name] = getattr(timestamps, component_name)
37
-
38
- for feature_name, seasonality in CALENDAR_FEATURES:
39
- feature_func = getattr(gluonts.time_feature, f"{feature_name}_index")
40
- feature = feature_func(timestamps).astype(np.int32)
41
- if seasonality is not None:
42
- df[f"{feature_name}_sin"] = np.sin(
43
- 2 * np.pi * feature / (seasonality - 1)
44
- ) # seasonality - 1 because the value starts from 0
45
- df[f"{feature_name}_cos"] = np.cos(
46
- 2 * np.pi * feature / (seasonality - 1)
47
- )
48
- else:
49
- df[feature_name] = feature
50
-
51
- return df
52
-
53
-
54
- class FeatureTransformer:
55
- @staticmethod
56
- def add_features(
57
- train_tsdf: TimeSeriesDataFrame,
58
- test_tsdf: TimeSeriesDataFrame,
59
- feature_generators: List[Callable[[TimeSeriesDataFrame], TimeSeriesDataFrame]],
60
- target_column: str = "target",
61
- ) -> Tuple[TimeSeriesDataFrame, TimeSeriesDataFrame]:
62
- assert target_column in train_tsdf.columns
63
- assert test_tsdf[target_column].isna().all()
64
-
65
- # Join train and test tsdf
66
- tsdf = pd.concat([train_tsdf, test_tsdf])
67
-
68
- # Apply feature generators
69
- for func in feature_generators:
70
- tsdf = tsdf.groupby(level="item_id", group_keys=False).apply(func)
71
-
72
- # Split train and test tsdf
73
- train_tsdf = tsdf.iloc[: len(train_tsdf)]
74
- test_tsdf = tsdf.iloc[len(train_tsdf) :]
75
-
76
- assert test_tsdf[target_column].isna().all()
77
-
78
- return train_tsdf, test_tsdf
@@ -1,11 +0,0 @@
1
- tabpfn_time_series/__init__.py,sha256=brJLLVOis4tBGOmNk6PCjyk_RaOvFITZgaYChOTVqSo,353
2
- tabpfn_time_series/data_preparation.py,sha256=iNW7sAnRkTgmzzOEHBhkkTwm_lQ3p_Q9xgAQ5PbkOts,5416
3
- tabpfn_time_series/defaults.py,sha256=u2_JnwxiZ5NNibzyNpsE63KuP3TcmOL1iAP8llZ2rJk,238
4
- tabpfn_time_series/feature.py,sha256=_9FxfQfgPOOO1MiT8hB8523eZ3Nc5oKuoY7vcohKZZc,2531
5
- tabpfn_time_series/plot.py,sha256=bwSYcWBanzPrUxXKFsbqG8fyGsOJZfgU2v3NsxzTSXo,6571
6
- tabpfn_time_series/predictor.py,sha256=JzuV34zERf1XDLacGzSFJb-o077qd7GlKC6lvD62EPk,1457
7
- tabpfn_time_series/tabpfn_worker.py,sha256=zvFwg4Dc01_m5emqmVITBr6W_cNZ04tMyntmj40pyPE,8299
8
- tabpfn_time_series-0.1.3.dist-info/METADATA,sha256=KQZBVKZgMX4e3uxk2LTCuSwruATLowUmgrP6wbcLMB8,4158
9
- tabpfn_time_series-0.1.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
- tabpfn_time_series-0.1.3.dist-info/licenses/LICENSE.txt,sha256=iwhPL7kIWQG6gyLZZwIMDItGrNgxMDIq9itxkUSMapY,11345
11
- tabpfn_time_series-0.1.3.dist-info/RECORD,,