tabpfn-time-series 0.1.2__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,12 @@
1
- from .feature import DefaultFeatures, FeatureTransformer
1
+ from .features import FeatureTransformer
2
2
  from .predictor import TabPFNTimeSeriesPredictor, TabPFNMode
3
+ from .defaults import TABPFN_TS_DEFAULT_QUANTILE_CONFIG
3
4
 
4
5
  __version__ = "0.1.0"
5
6
 
6
7
  __all__ = [
7
- "DefaultFeatures",
8
8
  "FeatureTransformer",
9
9
  "TabPFNTimeSeriesPredictor",
10
10
  "TabPFNMode",
11
+ "TABPFN_TS_DEFAULT_QUANTILE_CONFIG",
11
12
  ]
@@ -0,0 +1,17 @@
1
+ from .basic_features import (
2
+ RunningIndexFeature,
3
+ CalendarFeature,
4
+ AdditionalCalendarFeature,
5
+ PeriodicSinCosineFeature,
6
+ )
7
+ from .auto_features import AutoSeasonalFeature
8
+ from .feature_transformer import FeatureTransformer
9
+
10
+ __all__ = [
11
+ "RunningIndexFeature",
12
+ "CalendarFeature",
13
+ "AdditionalCalendarFeature",
14
+ "AutoSeasonalFeature",
15
+ "PeriodicSinCosineFeature",
16
+ "FeatureTransformer",
17
+ ]
@@ -0,0 +1,307 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from typing import List, Optional, Tuple, Literal
4
+
5
+ import logging
6
+
7
+ from scipy import fft
8
+ from scipy.signal import find_peaks
9
+ from statsmodels.tsa.stattools import acf
10
+
11
+ from tabpfn_time_series.features.feature_generator_base import (
12
+ FeatureGenerator,
13
+ )
14
+ from tabpfn_time_series.features.basic_features import (
15
+ PeriodicSinCosineFeature,
16
+ )
17
+
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class AutoSeasonalFeature(FeatureGenerator):
23
+ class Config:
24
+ max_top_k: int = 5
25
+ do_detrend: bool = True
26
+ detrend_type: Literal["first_diff", "loess", "linear", "constant"] = "linear"
27
+ use_peaks_only: bool = True
28
+ apply_hann_window: bool = True
29
+ zero_padding_factor: int = 2
30
+ round_to_closest_integer: bool = True
31
+ validate_with_acf: bool = False
32
+ sampling_interval: float = 1.0
33
+ magnitude_threshold: Optional[float] = 0.05
34
+ relative_threshold: bool = True
35
+ exclude_zero: bool = True
36
+
37
+ def __init__(self, config: Optional[dict] = None):
38
+ # Create default config from Config class
39
+ default_config = {
40
+ k: v for k, v in vars(self.Config).items() if not k.startswith("__")
41
+ }
42
+
43
+ # Initialize config with defaults
44
+ self.config = default_config.copy()
45
+
46
+ # Update with user-provided config if any
47
+ if config is not None:
48
+ self.config.update(config)
49
+
50
+ # Validate config parameters
51
+ self._validate_config()
52
+
53
+ logger.debug(f"Initialized AutoSeasonalFeature with config: {self.config}")
54
+
55
+ def _validate_config(self):
56
+ """Validate configuration parameters"""
57
+ if self.config["max_top_k"] < 1:
58
+ logger.warning("max_top_k must be at least 1, setting to 1")
59
+ self.config["max_top_k"] = 1
60
+
61
+ if self.config["zero_padding_factor"] < 1:
62
+ logger.warning("zero_padding_factor must be at least 1, setting to 1")
63
+ self.config["zero_padding_factor"] = 1
64
+
65
+ if self.config["detrend_type"] not in [
66
+ "first_diff",
67
+ "loess",
68
+ "linear",
69
+ "constant",
70
+ ]:
71
+ logger.warning(
72
+ f"Invalid detrend_type: {self.config['detrend_type']}, using 'linear'"
73
+ )
74
+ self.config["detrend_type"] = "linear"
75
+
76
+ def generate(self, df: pd.DataFrame) -> pd.DataFrame:
77
+ df = df.copy()
78
+
79
+ # Detect seasonal periods from target data
80
+ detected_periods_and_magnitudes = self.find_seasonal_periods(
81
+ df.target, **self.config
82
+ )
83
+ logger.debug(
84
+ f"Found {len(detected_periods_and_magnitudes)} seasonal periods: {detected_periods_and_magnitudes}"
85
+ )
86
+
87
+ # Extract just the periods (without magnitudes)
88
+ periods = [period for period, _ in detected_periods_and_magnitudes]
89
+
90
+ # Generate features for detected periods using PeriodicSinCosineFeature
91
+ if periods:
92
+ feature_generator = PeriodicSinCosineFeature(periods=periods)
93
+ df = feature_generator.generate(df)
94
+
95
+ # Standardize column names for consistency across time series
96
+ renamed_columns = {}
97
+ for i, period in enumerate(periods):
98
+ renamed_columns[f"sin_{period}"] = f"sin_#{i}"
99
+ renamed_columns[f"cos_{period}"] = f"cos_#{i}"
100
+
101
+ df = df.rename(columns=renamed_columns)
102
+
103
+ # Add placeholder zero columns for missing periods up to max_top_k
104
+ for i in range(len(periods), self.config["max_top_k"]):
105
+ df[f"sin_#{i}"] = 0.0
106
+ df[f"cos_#{i}"] = 0.0
107
+
108
+ return df
109
+
110
+ @staticmethod
111
+ def find_seasonal_periods(
112
+ target_values: pd.Series,
113
+ max_top_k: int = 10,
114
+ do_detrend: bool = True,
115
+ detrend_type: Literal[
116
+ "first_diff", "loess", "linear", "constant"
117
+ ] = "first_diff",
118
+ use_peaks_only: bool = True,
119
+ apply_hann_window: bool = True,
120
+ zero_padding_factor: int = 2,
121
+ round_to_closest_integer: bool = True,
122
+ validate_with_acf: bool = False,
123
+ sampling_interval: float = 1.0,
124
+ magnitude_threshold: Optional[
125
+ float
126
+ ] = 0.05, # Default relative threshold (5% of max)
127
+ relative_threshold: bool = True, # Interpret threshold as a fraction of max FFT magnitude
128
+ exclude_zero: bool = False,
129
+ ) -> List[Tuple[float, float]]:
130
+ """
131
+ Identify dominant seasonal periods in a time series using FFT.
132
+
133
+ Parameters:
134
+ - target_values: pd.Series
135
+ Input time series data.
136
+ - max_top_k: int
137
+ Maximum number of dominant periods to return.
138
+ - do_detrend: bool
139
+ If True, remove the linear trend from the signal.
140
+ - use_peaks_only: bool
141
+ If True, consider only local peaks in the FFT magnitude spectrum.
142
+ - apply_hann_window: bool
143
+ If True, apply a Hann window to reduce spectral leakage.
144
+ - zero_padding_factor: int
145
+ Factor by which to zero-pad the signal for finer frequency resolution.
146
+ - round_to_closest_integer: bool
147
+ If True, round the detected periods to the nearest integer.
148
+ - validate_with_acf: bool
149
+ If True, validate detected periods against the autocorrelation function.
150
+ - sampling_interval: float
151
+ Time interval between consecutive samples.
152
+ - magnitude_threshold: Optional[float]
153
+ Threshold to filter out less significant frequency components.
154
+ Default is 0.05, interpreted as 5% of the maximum FFT magnitude if relative_threshold is True.
155
+ - relative_threshold: bool
156
+ If True, the `magnitude_threshold` is interpreted as a fraction of the maximum FFT magnitude.
157
+ Otherwise, it is treated as an absolute threshold value.
158
+ - exclude_zero: bool
159
+ If True, exclude periods of 0 from the results.
160
+
161
+ Returns:
162
+ - List[Tuple[float, float]]:
163
+ A list of (period, magnitude) tuples, sorted in descending order by magnitude.
164
+ """
165
+ # Convert the Pandas Series to a NumPy array
166
+ values = np.array(target_values, dtype=float)
167
+
168
+ # Quick hack to ignore the test_X
169
+ # (Assuming train_X target is not NaN, and test_X target is NaN)
170
+ # Dropping all the NaN values
171
+ values = values[~np.isnan(values)]
172
+
173
+ N_original = len(values)
174
+
175
+ # Detrend the signal using a linear detrend method if requested
176
+ if do_detrend:
177
+ values = detrend(values, detrend_type)
178
+
179
+ # Apply a Hann window to reduce spectral leakage
180
+ if apply_hann_window:
181
+ window = np.hanning(N_original)
182
+ values = values * window
183
+
184
+ # Zero-pad the signal for improved frequency resolution
185
+ if zero_padding_factor > 1:
186
+ padded_length = int(N_original * zero_padding_factor)
187
+ padded_values = np.zeros(padded_length)
188
+ padded_values[:N_original] = values
189
+ values = padded_values
190
+ N = padded_length
191
+ else:
192
+ N = N_original
193
+
194
+ # Compute the FFT (using rfft) and obtain frequency bins
195
+ fft_values = fft.rfft(values)
196
+ fft_magnitudes = np.abs(fft_values)
197
+ freqs = np.fft.rfftfreq(N, d=sampling_interval)
198
+
199
+ # Exclude the DC component (0 Hz) to avoid bias from the signal's mean
200
+ fft_magnitudes[0] = 0.0
201
+
202
+ # Determine the threshold (absolute value)
203
+ if magnitude_threshold is not None and relative_threshold:
204
+ threshold_value = magnitude_threshold * np.max(fft_magnitudes)
205
+ else:
206
+ threshold_value = magnitude_threshold
207
+
208
+ # Identify dominant frequencies
209
+ if use_peaks_only:
210
+ if threshold_value is not None:
211
+ peak_indices, _ = find_peaks(fft_magnitudes, height=threshold_value)
212
+ else:
213
+ peak_indices, _ = find_peaks(fft_magnitudes)
214
+ if len(peak_indices) == 0:
215
+ # Fallback to considering all frequency bins if no peaks are found
216
+ peak_indices = np.arange(len(fft_magnitudes))
217
+ # Sort the peak indices by magnitude in descending order
218
+ sorted_peak_indices = peak_indices[
219
+ np.argsort(fft_magnitudes[peak_indices])[::-1]
220
+ ]
221
+ top_indices = sorted_peak_indices[:max_top_k]
222
+ else:
223
+ sorted_indices = np.argsort(fft_magnitudes)[::-1]
224
+ if threshold_value is not None:
225
+ sorted_indices = [
226
+ i for i in sorted_indices if fft_magnitudes[i] >= threshold_value
227
+ ]
228
+ top_indices = sorted_indices[:max_top_k]
229
+
230
+ # Convert frequencies to periods (avoiding division by zero)
231
+ periods = np.zeros_like(freqs)
232
+ non_zero = freqs > 0
233
+ periods[non_zero] = 1.0 / freqs[non_zero]
234
+ top_periods = periods[top_indices]
235
+
236
+ logger.debug(f"Top periods: {top_periods}")
237
+
238
+ # Optionally round the periods to the nearest integer
239
+ if round_to_closest_integer:
240
+ top_periods = np.round(top_periods)
241
+
242
+ # Filter out zero periods if requested
243
+ if exclude_zero:
244
+ non_zero_mask = top_periods != 0
245
+ top_periods = top_periods[non_zero_mask]
246
+ top_indices = top_indices[non_zero_mask]
247
+
248
+ # Keep unique periods only
249
+ if len(top_periods) > 0:
250
+ unique_period_indices = np.unique(top_periods, return_index=True)[1]
251
+ top_periods = top_periods[unique_period_indices]
252
+ top_indices = top_indices[unique_period_indices]
253
+
254
+ # Pair each period with its corresponding magnitude
255
+ results = [
256
+ (top_periods[i], fft_magnitudes[top_indices[i]])
257
+ for i in range(len(top_indices))
258
+ ]
259
+
260
+ # Validate with ACF if requested and filter the results accordingly
261
+ if validate_with_acf:
262
+ # Compute ACF on the original (non-padded) detrended signal
263
+ acf_values = acf(
264
+ np.array(target_values, dtype=float)[:N_original],
265
+ nlags=N_original,
266
+ fft=True,
267
+ )
268
+ acf_peak_indices, _ = find_peaks(
269
+ acf_values, height=1.96 / np.sqrt(N_original)
270
+ )
271
+ validated_results = []
272
+ for period, mag in results:
273
+ period_int = int(round(period))
274
+ if period_int < len(acf_values) and any(
275
+ abs(period_int - peak) <= 1 for peak in acf_peak_indices
276
+ ):
277
+ validated_results.append((period, mag))
278
+ if validated_results:
279
+ results = validated_results
280
+
281
+ # Ensure the final results are sorted in descending order by magnitude
282
+ results.sort(key=lambda x: x[1], reverse=True)
283
+
284
+ return results
285
+
286
+
287
+ def detrend(
288
+ x: np.ndarray, detrend_type: Literal["first_diff", "loess", "linear"]
289
+ ) -> np.ndarray:
290
+ if detrend_type == "first_diff":
291
+ return np.diff(x, prepend=x[0])
292
+
293
+ elif detrend_type == "loess":
294
+ from statsmodels.api import nonparametric
295
+
296
+ indices = np.arange(len(x))
297
+ lowess = nonparametric.lowess(x, indices, frac=0.1)
298
+ trend = lowess[:, 1]
299
+ return x - trend
300
+
301
+ elif detrend_type in ["linear", "constant"]:
302
+ from scipy.signal import detrend as scipy_detrend
303
+
304
+ return scipy_detrend(x, type=detrend_type)
305
+
306
+ else:
307
+ raise ValueError(f"Invalid detrend method: {detrend_type}")
@@ -0,0 +1,88 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from typing import List, Dict, Optional
4
+
5
+ import gluonts.time_feature
6
+
7
+ from tabpfn_time_series.features.feature_generator_base import (
8
+ FeatureGenerator,
9
+ )
10
+
11
+
12
+ class RunningIndexFeature(FeatureGenerator):
13
+ def generate(self, df: pd.DataFrame) -> pd.DataFrame:
14
+ df = df.copy()
15
+ df["running_index"] = range(len(df))
16
+ return df
17
+
18
+
19
+ class CalendarFeature(FeatureGenerator):
20
+ def __init__(
21
+ self,
22
+ components: Optional[List[str]] = None,
23
+ seasonal_features: Optional[Dict[str, List[float]]] = None,
24
+ ):
25
+ self.components = components or ["year"]
26
+ self.seasonal_features = seasonal_features or {
27
+ # (feature, natural seasonality)
28
+ "second_of_minute": [60],
29
+ "minute_of_hour": [60],
30
+ "hour_of_day": [24],
31
+ "day_of_week": [7],
32
+ "day_of_month": [30.5],
33
+ "day_of_year": [365],
34
+ "week_of_year": [52],
35
+ "month_of_year": [12],
36
+ }
37
+
38
+ def generate(self, df: pd.DataFrame) -> pd.DataFrame:
39
+ df = df.copy()
40
+ timestamps = df.index.get_level_values("timestamp")
41
+
42
+ # Add basic calendar components
43
+ for component in self.components:
44
+ df[component] = getattr(timestamps, component)
45
+
46
+ # Add seasonal features
47
+ for feature_name, periods in self.seasonal_features.items():
48
+ feature_func = getattr(gluonts.time_feature, f"{feature_name}_index")
49
+ feature = feature_func(timestamps).astype(np.int32)
50
+
51
+ if periods is not None:
52
+ for period in periods:
53
+ period = period - 1 # Adjust for 0-based indexing
54
+ df[f"{feature_name}_sin"] = np.sin(2 * np.pi * feature / period)
55
+ df[f"{feature_name}_cos"] = np.cos(2 * np.pi * feature / period)
56
+ else:
57
+ df[feature_name] = feature
58
+
59
+ return df
60
+
61
+
62
+ class AdditionalCalendarFeature(CalendarFeature):
63
+ def __init__(
64
+ self,
65
+ components: Optional[List[str]] = None,
66
+ additional_seasonal_features: Optional[Dict[str, List[float]]] = None,
67
+ ):
68
+ super().__init__(components=components)
69
+
70
+ self.seasonal_features = {
71
+ **additional_seasonal_features,
72
+ **self.seasonal_features,
73
+ }
74
+
75
+
76
+ class PeriodicSinCosineFeature(FeatureGenerator):
77
+ def __init__(self, periods: List[float], name_suffix: str = None):
78
+ self.periods = periods
79
+ self.name_suffix = name_suffix
80
+
81
+ def generate(self, df: pd.DataFrame) -> pd.DataFrame:
82
+ df = df.copy()
83
+ for i, period in enumerate(self.periods):
84
+ name_suffix = f"{self.name_suffix}_{i}" if self.name_suffix else f"{period}"
85
+ df[f"sin_{name_suffix}"] = np.sin(2 * np.pi * np.arange(len(df)) / period)
86
+ df[f"cos_{name_suffix}"] = np.cos(2 * np.pi * np.arange(len(df)) / period)
87
+
88
+ return df
@@ -0,0 +1,21 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+ import pandas as pd
4
+
5
+
6
+ class FeatureGenerator(ABC):
7
+ """Abstract base class for feature generators"""
8
+
9
+ @abstractmethod
10
+ def generate(self, df: pd.DataFrame) -> pd.DataFrame:
11
+ """Generate features for the given dataframe"""
12
+ pass
13
+
14
+ def __call__(self, df: pd.DataFrame) -> pd.DataFrame:
15
+ return self.generate(df)
16
+
17
+ def __str__(self) -> str:
18
+ return f"{self.__class__.__name__}_{self.__dict__}"
19
+
20
+ def __repr__(self) -> str:
21
+ return self.__str__()
@@ -0,0 +1,53 @@
1
+ from typing import List, Tuple
2
+
3
+ import pandas as pd
4
+
5
+ from autogluon.timeseries import TimeSeriesDataFrame
6
+ from tabpfn_time_series.features.feature_generator_base import (
7
+ FeatureGenerator,
8
+ )
9
+
10
+
11
+ class FeatureTransformer:
12
+ def __init__(self, feature_generators: List[FeatureGenerator]):
13
+ self.feature_generators = feature_generators
14
+
15
+ def transform(
16
+ self,
17
+ train_tsdf: TimeSeriesDataFrame,
18
+ test_tsdf: TimeSeriesDataFrame,
19
+ target_column: str = "target",
20
+ ) -> Tuple[TimeSeriesDataFrame, TimeSeriesDataFrame]:
21
+ """Transform both train and test data with the configured feature generators"""
22
+
23
+ self._validate_input(train_tsdf, test_tsdf, target_column)
24
+ tsdf = pd.concat([train_tsdf, test_tsdf])
25
+
26
+ # Apply all feature generators
27
+ for generator in self.feature_generators:
28
+ tsdf = tsdf.groupby(level="item_id", group_keys=False).apply(generator)
29
+
30
+ # Split train and test tsdf
31
+ train_tsdf = tsdf.iloc[: len(train_tsdf)]
32
+ test_tsdf = tsdf.iloc[len(train_tsdf) :]
33
+
34
+ assert (
35
+ not train_tsdf[target_column].isna().any()
36
+ ), "All target values in train_tsdf should be non-NaN"
37
+ assert test_tsdf[target_column].isna().all()
38
+
39
+ return train_tsdf, test_tsdf
40
+
41
+ @staticmethod
42
+ def _validate_input(
43
+ train_tsdf: TimeSeriesDataFrame,
44
+ test_tsdf: TimeSeriesDataFrame,
45
+ target_column: str,
46
+ ):
47
+ if target_column not in train_tsdf.columns:
48
+ raise ValueError(
49
+ f"Target column '{target_column}' not found in training data"
50
+ )
51
+
52
+ if not test_tsdf[target_column].isna().all():
53
+ raise ValueError("Test data should not contain target values")
@@ -3,11 +3,8 @@ from enum import Enum
3
3
 
4
4
  from autogluon.timeseries import TimeSeriesDataFrame
5
5
 
6
- from tabpfn_time_series.tabpfn_worker import TabPFNClient, LocalTabPFN
7
- from tabpfn_time_series.defaults import (
8
- TABPFN_TS_DEFAULT_QUANTILE_CONFIG,
9
- TABPFN_TS_DEFAULT_CONFIG,
10
- )
6
+ from tabpfn_time_series.tabpfn_worker import TabPFNClient, LocalTabPFN, MockTabPFN
7
+ from tabpfn_time_series.defaults import TABPFN_TS_DEFAULT_CONFIG
11
8
 
12
9
  logger = logging.getLogger(__name__)
13
10
 
@@ -15,6 +12,7 @@ logger = logging.getLogger(__name__)
15
12
  class TabPFNMode(Enum):
16
13
  LOCAL = "tabpfn-local"
17
14
  CLIENT = "tabpfn-client"
15
+ MOCK = "tabpfn-mock"
18
16
 
19
17
 
20
18
  class TabPFNTimeSeriesPredictor:
@@ -30,6 +28,7 @@ class TabPFNTimeSeriesPredictor:
30
28
  worker_mapping = {
31
29
  TabPFNMode.CLIENT: lambda: TabPFNClient(config),
32
30
  TabPFNMode.LOCAL: lambda: LocalTabPFN(config),
31
+ TabPFNMode.MOCK: lambda: MockTabPFN(config),
33
32
  }
34
33
  self.tabpfn_worker = worker_mapping[tabpfn_mode]()
35
34
 
@@ -37,7 +36,6 @@ class TabPFNTimeSeriesPredictor:
37
36
  self,
38
37
  train_tsdf: TimeSeriesDataFrame, # with features and target
39
38
  test_tsdf: TimeSeriesDataFrame, # with features only
40
- quantile_config: list[float] = TABPFN_TS_DEFAULT_QUANTILE_CONFIG,
41
39
  ) -> TimeSeriesDataFrame:
42
40
  """
43
41
  Predict on each time series individually (local forecasting).
@@ -47,4 +45,4 @@ class TabPFNTimeSeriesPredictor:
47
45
  f"Predicting {len(train_tsdf.item_ids)} time series with config{self.tabpfn_worker.config}"
48
46
  )
49
47
 
50
- return self.tabpfn_worker.predict(train_tsdf, test_tsdf, quantile_config)
48
+ return self.tabpfn_worker.predict(train_tsdf, test_tsdf)
@@ -2,8 +2,10 @@ import logging
2
2
  from abc import ABC, abstractmethod
3
3
  from joblib import Parallel, delayed
4
4
 
5
+ from tqdm import tqdm
5
6
  import pandas as pd
6
7
  import numpy as np
8
+ import torch
7
9
  from scipy.stats import norm
8
10
  from autogluon.timeseries import TimeSeriesDataFrame
9
11
 
@@ -26,14 +28,7 @@ class TabPFNWorker(ABC):
26
28
  self,
27
29
  train_tsdf: TimeSeriesDataFrame,
28
30
  test_tsdf: TimeSeriesDataFrame,
29
- quantile_config: list[float],
30
31
  ):
31
- if not set(quantile_config).issubset(set(TABPFN_TS_DEFAULT_QUANTILE_CONFIG)):
32
- raise NotImplementedError(
33
- f"We currently only supports {TABPFN_TS_DEFAULT_QUANTILE_CONFIG} for quantile prediction,"
34
- f" but got {quantile_config}."
35
- )
36
-
37
32
  predictions = Parallel(
38
33
  n_jobs=self.num_workers,
39
34
  backend="loky",
@@ -42,9 +37,8 @@ class TabPFNWorker(ABC):
42
37
  item_id,
43
38
  train_tsdf.loc[item_id],
44
39
  test_tsdf.loc[item_id],
45
- quantile_config,
46
40
  )
47
- for item_id in train_tsdf.item_ids
41
+ for item_id in tqdm(train_tsdf.item_ids, desc="Predicting time series")
48
42
  )
49
43
 
50
44
  predictions = pd.concat(predictions)
@@ -59,8 +53,9 @@ class TabPFNWorker(ABC):
59
53
  item_id: str,
60
54
  single_train_tsdf: TimeSeriesDataFrame,
61
55
  single_test_tsdf: TimeSeriesDataFrame,
62
- quantile_config: list[float],
63
56
  ) -> pd.DataFrame:
57
+ # logger.debug(f"Predicting on item_id: {item_id}")
58
+
64
59
  test_index = single_test_tsdf.index
65
60
  train_X, train_y = split_time_series_to_X_y(single_train_tsdf.copy())
66
61
  test_X, _ = split_time_series_to_X_y(single_test_tsdf.copy())
@@ -70,7 +65,7 @@ class TabPFNWorker(ABC):
70
65
  if train_y_has_constant_value:
71
66
  logger.info("Found time-series with constant target")
72
67
  result = self._predict_on_constant_train_target(
73
- single_train_tsdf, single_test_tsdf, quantile_config
68
+ single_train_tsdf, single_test_tsdf
74
69
  )
75
70
  else:
76
71
  tabpfn = self._get_tabpfn_engine()
@@ -81,7 +76,9 @@ class TabPFNWorker(ABC):
81
76
  result.update(
82
77
  {
83
78
  q: q_pred
84
- for q, q_pred in zip(quantile_config, full_pred["quantiles"])
79
+ for q, q_pred in zip(
80
+ TABPFN_TS_DEFAULT_QUANTILE_CONFIG, full_pred["quantiles"]
81
+ )
85
82
  }
86
83
  )
87
84
 
@@ -98,7 +95,6 @@ class TabPFNWorker(ABC):
98
95
  self,
99
96
  single_train_tsdf: TimeSeriesDataFrame,
100
97
  single_test_tsdf: TimeSeriesDataFrame,
101
- quantile_config: list[float],
102
98
  ) -> pd.DataFrame:
103
99
  # If train_y is constant, we return the constant value from the training set
104
100
  mean_constant = single_train_tsdf.target.iloc[0]
@@ -106,12 +102,14 @@ class TabPFNWorker(ABC):
106
102
 
107
103
  # For quantile prediction, we assume that the uncertainty follows a standard normal distribution
108
104
  quantile_pred_with_uncertainty = norm.ppf(
109
- quantile_config, loc=mean_constant, scale=1
105
+ TABPFN_TS_DEFAULT_QUANTILE_CONFIG, loc=mean_constant, scale=1
110
106
  )
111
107
  result.update(
112
108
  {
113
109
  q: np.full(len(single_test_tsdf), v)
114
- for q, v in zip(quantile_config, quantile_pred_with_uncertainty)
110
+ for q, v in zip(
111
+ TABPFN_TS_DEFAULT_QUANTILE_CONFIG, quantile_pred_with_uncertainty
112
+ )
115
113
  }
116
114
  )
117
115
 
@@ -141,8 +139,52 @@ class LocalTabPFN(TabPFNWorker):
141
139
  def __init__(
142
140
  self,
143
141
  config: dict = {},
142
+ num_workers_per_gpu: int = 4, # per GPU
143
+ ):
144
+ self.num_workers_per_gpu = num_workers_per_gpu
145
+
146
+ # Only support GPU for now (inference on CPU takes too long)
147
+ if not torch.cuda.is_available():
148
+ raise ValueError("GPU is required for local TabPFN inference")
149
+
150
+ super().__init__(
151
+ config, num_workers=torch.cuda.device_count() * self.num_workers_per_gpu
152
+ )
153
+
154
+ def predict(
155
+ self,
156
+ train_tsdf: TimeSeriesDataFrame,
157
+ test_tsdf: TimeSeriesDataFrame,
144
158
  ):
145
- super().__init__(config, num_workers=1)
159
+ total_num_workers = torch.cuda.device_count() * self.num_workers_per_gpu
160
+
161
+ # Split data into chunks for parallel inference on each GPU
162
+ # since the time series are of different lengths, we shuffle
163
+ # the item_ids s.t. the workload is distributed evenly across GPUs
164
+ # Also, using 'min' since num_workers could be larger than the number of time series
165
+ np.random.seed(0)
166
+ item_ids_chunks = np.array_split(
167
+ np.random.permutation(train_tsdf.item_ids),
168
+ min(total_num_workers, len(train_tsdf.item_ids)),
169
+ )
170
+
171
+ # Run predictions in parallel
172
+ predictions = Parallel(n_jobs=len(item_ids_chunks), backend="loky")(
173
+ delayed(self._prediction_routine_per_gpu)(
174
+ train_tsdf.loc[chunk],
175
+ test_tsdf.loc[chunk],
176
+ gpu_id=i
177
+ % torch.cuda.device_count(), # Alternate between available GPUs
178
+ )
179
+ for i, chunk in enumerate(item_ids_chunks)
180
+ )
181
+
182
+ predictions = pd.concat(predictions)
183
+
184
+ # Sort predictions according to original item_ids order
185
+ predictions = predictions.loc[train_tsdf.item_ids]
186
+
187
+ return TimeSeriesDataFrame(predictions)
146
188
 
147
189
  def _get_tabpfn_engine(self):
148
190
  from tabpfn import TabPFNRegressor
@@ -151,7 +193,67 @@ class LocalTabPFN(TabPFNWorker):
151
193
  config = self.config["tabpfn_internal"].copy()
152
194
  config["model_path"] = self._parse_model_path(config["model_path"])
153
195
 
154
- return TabPFNRegressor(**config)
196
+ return TabPFNRegressor(**config, random_state=0)
155
197
 
156
198
  def _parse_model_path(self, model_name: str) -> str:
157
199
  return f"tabpfn-v2-regressor-{model_name}.ckpt"
200
+
201
+ def _prediction_routine_per_gpu(
202
+ self,
203
+ train_tsdf: TimeSeriesDataFrame,
204
+ test_tsdf: TimeSeriesDataFrame,
205
+ gpu_id: int,
206
+ ):
207
+ # Set GPU
208
+ torch.cuda.set_device(gpu_id)
209
+
210
+ all_pred = []
211
+ for item_id in tqdm(train_tsdf.item_ids, desc=f"GPU {gpu_id}:"):
212
+ predictions = self._prediction_routine(
213
+ item_id,
214
+ train_tsdf.loc[item_id],
215
+ test_tsdf.loc[item_id],
216
+ )
217
+ all_pred.append(predictions)
218
+
219
+ # Clear GPU cache
220
+ torch.cuda.empty_cache()
221
+
222
+ return pd.concat(all_pred)
223
+
224
+
225
+ class MockTabPFN(TabPFNWorker):
226
+ """
227
+ Mock TabPFN worker that returns random values for predictions.
228
+ Can be used for testing or debugging.
229
+ """
230
+
231
+ class MockTabPFNRegressor:
232
+ TABPFN_QUANTILE = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
233
+
234
+ def __init__(self, *args, **kwargs):
235
+ pass
236
+
237
+ def fit(self, *args, **kwargs):
238
+ pass
239
+
240
+ def predict(self, test_X, output_type="main", **kwargs):
241
+ if output_type != "main":
242
+ raise NotImplementedError(
243
+ "Only main output is supported for mock TabPFN"
244
+ )
245
+
246
+ return {
247
+ "mean": np.random.rand(len(test_X)),
248
+ "median": np.random.rand(len(test_X)),
249
+ "mode": np.random.rand(len(test_X)),
250
+ "quantiles": [
251
+ np.random.rand(len(test_X)) for _ in self.TABPFN_QUANTILE
252
+ ],
253
+ }
254
+
255
+ def __init__(self, *args, **kwargs):
256
+ super().__init__(*args, **kwargs)
257
+
258
+ def _get_tabpfn_engine(self):
259
+ return self.MockTabPFNRegressor()
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tabpfn_time_series
3
- Version: 0.1.2
4
- Summary: Zero-shot time series forecasting with TabPFN
3
+ Version: 1.0.0
4
+ Summary: Zero-shot time series forecasting with TabPFNv2
5
5
  Project-URL: Homepage, https://github.com/liam-sbhoo/tabpfn-time-series
6
6
  Project-URL: Bug Tracker, https://github.com/liam-sbhoo/tabpfn-time-series/issues
7
7
  Author-email: Liam Shi Bin Hoo <hoos@tf.uni-freiburg.de>
@@ -10,28 +10,43 @@ Classifier: License :: OSI Approved :: Apache Software License
10
10
  Classifier: Operating System :: OS Independent
11
11
  Classifier: Programming Language :: Python :: 3
12
12
  Requires-Python: >=3.10
13
- Requires-Dist: autogluon-timeseries==1.2
14
- Requires-Dist: gluonts==0.16.0
15
- Requires-Dist: pandas
16
- Requires-Dist: tabpfn-client==0.1.1
17
- Requires-Dist: tabpfn==2.0.0
13
+ Requires-Dist: autogluon-timeseries>=1.2
14
+ Requires-Dist: datasets>=3.3.2
15
+ Requires-Dist: gluonts>=0.16.0
16
+ Requires-Dist: pandas<2.2.0,>=2.1.2
17
+ Requires-Dist: python-dotenv>=1.1.0
18
+ Requires-Dist: pyyaml>=6.0.1
19
+ Requires-Dist: tabpfn-client>=0.1.7
20
+ Requires-Dist: tabpfn>=2.0.9
18
21
  Requires-Dist: tqdm
19
22
  Provides-Extra: dev
20
23
  Requires-Dist: build; extra == 'dev'
21
24
  Requires-Dist: jupyter; extra == 'dev'
22
25
  Requires-Dist: pre-commit; extra == 'dev'
23
26
  Requires-Dist: ruff; extra == 'dev'
27
+ Requires-Dist: submitit>=1.5.2; extra == 'dev'
24
28
  Requires-Dist: twine; extra == 'dev'
29
+ Requires-Dist: wandb>=0.19.8; extra == 'dev'
25
30
  Description-Content-Type: text/markdown
26
31
 
27
- # Time Series Forecasting with TabPFN
32
+ # TabPFN-TS
28
33
 
34
+ > Zero-Shot Time Series Forecasting with TabPFNv2
35
+
36
+ [![PyPI version](https://badge.fury.io/py/tabpfn-time-series.svg)](https://badge.fury.io/py/tabpfn-time-series)
29
37
  [![colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/liam-sbhoo/tabpfn-time-series/blob/main/demo.ipynb)
30
38
  [![Discord](https://img.shields.io/discord/1285598202732482621?color=7289da&label=Discord&logo=discord&logoColor=ffffff)](https://discord.com/channels/1285598202732482621/)
31
- [![arXiv](https://img.shields.io/badge/arXiv-2501.02945-<COLOR>.svg)](https://arxiv.org/abs/2501.02945)
39
+ [![arXiv](https://img.shields.io/badge/arXiv-2501.02945-<COLOR>.svg)](https://arxiv.org/abs/2501.02945v3)
40
+
41
+ ## 📌 News
42
+ - **27-05-2025**: 📝 New **[paper](https://arxiv.org/abs/2501.02945v3)** version and **v1.0.0** release! Strong [GIFT-EVAL](https://huggingface.co/spaces/Salesforce/GIFT-Eval) results, new AutoSeasonalFeatures, improved CalendarFeatures.
43
+ - **27-01-2025**: 🚀 Ranked _**1st**_ on [GIFT-EVAL](https://huggingface.co/spaces/Salesforce/GIFT-Eval) benchmark<sup>[1]</sup>!
44
+ - **10-10-2024**: 🚀 TabPFN-TS [paper](https://arxiv.org/abs/2501.02945v2) accepted to NeurIPS 2024 [TRL](https://table-representation-learning.github.io/NeurIPS2024/) and [TSALM](https://neurips-time-series-workshop.github.io/) workshops!
32
45
 
46
+ _[1] Last checked on: 10/03/2025_
33
47
 
34
- We demonstrate that the tabular foundation model **[TabPFN](https://github.com/PriorLabs/TabPFN)**, when paired with minimal featurization, can perform zero-shot time series forecasting. Its performance on point forecasting matches or even slightly outperforms state-of-the-art methods.
48
+ ## Introduction
49
+ We demonstrate that the tabular foundation model **[TabPFNv2](https://github.com/PriorLabs/TabPFN)**, combined with lightweight feature engineering, enables zero-shot time series forecasting for both point and probabilistic tasks. On the **[GIFT-EVAL](https://huggingface.co/spaces/Salesforce/GIFT-Eval)** benchmark, our method achieves performance on par with top-tier models across both evaluation metrics.
35
50
 
36
51
  ## 📖 How does it work?
37
52
 
@@ -41,18 +56,19 @@ Our work proposes to frame **univariate time series forecasting** as a **tabular
41
56
 
42
57
  Concretely, we:
43
58
  1. Transform a time series into a table
44
- 2. Extract features from timestamp and add them to the table
45
- 3. Perform regression on the table using TabPFN
59
+ 2. Extract features and add them to the table
60
+ 3. Perform regression on the table using TabPFNv2
46
61
  4. Use regression results as time series forecasting outputs
47
62
 
48
- For more details, please refer to our [paper](https://arxiv.org/abs/2501.02945) and our [poster](docs/tabpfn-ts-neurips-poster.pdf) (presented at NeurIPS 2024 TRL and TSALM workshops).
63
+ For more details, please refer to our [paper](https://arxiv.org/abs/2501.02945v3).
64
+ <!-- and our [poster](docs/tabpfn-ts-neurips-poster.pdf) (presented at NeurIPS 2024 TRL and TSALM workshops). -->
49
65
 
50
66
  ## 👉 **Why gives us a try?**
51
67
  - **Zero-shot forecasting**: this method is extremely fast and requires no training, making it highly accessible for experimenting with your own problems.
52
68
  - **Point and probabilistic forecasting**: it provides accurate point forecasts as well as probabilistic forecasts.
53
69
  - **Support for exogenous variables**: if you have exogenous variables, this method can seemlessly incorporate them into the forecasting model.
54
70
 
55
- On top of that, thanks to **[tabpfn-client](https://github.com/automl/tabpfn-client)** from **[Prior Labs](https://priorlabs.ai)**, you won’t even need your own GPU to run fast inference with TabPFN. 😉 We have included `tabpfn-client` as the default engine in our implementation.
71
+ On top of that, thanks to **[tabpfn-client](https://github.com/automl/tabpfn-client)** from **[Prior Labs](https://priorlabs.ai)**, you won’t even need your own GPU to run fast inference with TabPFNv2. 😉 We have included `tabpfn-client` as the default engine in our implementation.
56
72
 
57
73
  ## How to use it?
58
74
 
@@ -0,0 +1,15 @@
1
+ tabpfn_time_series/__init__.py,sha256=3XGvQieVbONwhVtn1rITet6HNiTMWQTxHm2xLlGI5ew,314
2
+ tabpfn_time_series/data_preparation.py,sha256=iNW7sAnRkTgmzzOEHBhkkTwm_lQ3p_Q9xgAQ5PbkOts,5416
3
+ tabpfn_time_series/defaults.py,sha256=u2_JnwxiZ5NNibzyNpsE63KuP3TcmOL1iAP8llZ2rJk,238
4
+ tabpfn_time_series/plot.py,sha256=bwSYcWBanzPrUxXKFsbqG8fyGsOJZfgU2v3NsxzTSXo,6571
5
+ tabpfn_time_series/predictor.py,sha256=JzuV34zERf1XDLacGzSFJb-o077qd7GlKC6lvD62EPk,1457
6
+ tabpfn_time_series/tabpfn_worker.py,sha256=zvFwg4Dc01_m5emqmVITBr6W_cNZ04tMyntmj40pyPE,8299
7
+ tabpfn_time_series/features/__init__.py,sha256=lzdZWkEfntfg3ZHqNNbfbg-3o_VIzju0tebdRu3AzF4,421
8
+ tabpfn_time_series/features/auto_features.py,sha256=3OqqY2h7umcoLjLx4hOXypLTjwzrMtd6cQKTNi83vrU,11561
9
+ tabpfn_time_series/features/basic_features.py,sha256=OV3B__S30-CX88vGjwYQDWqAbJajQw80PxcnvJVUbm4,2955
10
+ tabpfn_time_series/features/feature_generator_base.py,sha256=jtySWLJyX4E31v6CbX44EHa8cdz7OMyauf4ltNEQeAQ,534
11
+ tabpfn_time_series/features/feature_transformer.py,sha256=mUsbnPUhJ4lPcnGWk8Ag1hgCOE1V5I0iQRT4VFgQEso,1763
12
+ tabpfn_time_series-1.0.0.dist-info/METADATA,sha256=CvXqIOHNTKyd-zpCednsqa3FloPk6lFJ4ISG0eSEWx4,4434
13
+ tabpfn_time_series-1.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
+ tabpfn_time_series-1.0.0.dist-info/licenses/LICENSE.txt,sha256=iwhPL7kIWQG6gyLZZwIMDItGrNgxMDIq9itxkUSMapY,11345
15
+ tabpfn_time_series-1.0.0.dist-info/RECORD,,
@@ -1,78 +0,0 @@
1
- import numpy as np
2
- import pandas as pd
3
- from typing import Tuple, List, Callable
4
-
5
- import gluonts.time_feature
6
- from autogluon.timeseries import TimeSeriesDataFrame
7
-
8
-
9
- class DefaultFeatures:
10
- @staticmethod
11
- def add_running_index(df: pd.DataFrame) -> pd.Series:
12
- df["running_index"] = range(len(df))
13
- return df
14
-
15
- @staticmethod
16
- def add_calendar_features(df: pd.DataFrame) -> pd.DataFrame:
17
- CALENDAR_COMPONENT = [
18
- "year",
19
- # "month",
20
- # "day",
21
- ]
22
-
23
- CALENDAR_FEATURES = [
24
- # (feature, natural seasonality)
25
- ("hour_of_day", 24),
26
- ("day_of_week", 7),
27
- ("day_of_month", 30.5),
28
- ("day_of_year", 365),
29
- ("week_of_year", 52),
30
- ("month_of_year", 12),
31
- ]
32
-
33
- timestamps = df.index.get_level_values("timestamp")
34
-
35
- for component_name in CALENDAR_COMPONENT:
36
- df[component_name] = getattr(timestamps, component_name)
37
-
38
- for feature_name, seasonality in CALENDAR_FEATURES:
39
- feature_func = getattr(gluonts.time_feature, f"{feature_name}_index")
40
- feature = feature_func(timestamps).astype(np.int32)
41
- if seasonality is not None:
42
- df[f"{feature_name}_sin"] = np.sin(
43
- 2 * np.pi * feature / (seasonality - 1)
44
- ) # seasonality - 1 because the value starts from 0
45
- df[f"{feature_name}_cos"] = np.cos(
46
- 2 * np.pi * feature / (seasonality - 1)
47
- )
48
- else:
49
- df[feature_name] = feature
50
-
51
- return df
52
-
53
-
54
- class FeatureTransformer:
55
- @staticmethod
56
- def add_features(
57
- train_tsdf: TimeSeriesDataFrame,
58
- test_tsdf: TimeSeriesDataFrame,
59
- feature_generators: List[Callable[[TimeSeriesDataFrame], TimeSeriesDataFrame]],
60
- target_column: str = "target",
61
- ) -> Tuple[TimeSeriesDataFrame, TimeSeriesDataFrame]:
62
- assert target_column in train_tsdf.columns
63
- assert test_tsdf[target_column].isna().all()
64
-
65
- # Join train and test tsdf
66
- tsdf = pd.concat([train_tsdf, test_tsdf])
67
-
68
- # Apply feature generators
69
- for func in feature_generators:
70
- tsdf = tsdf.groupby(level="item_id", group_keys=False).apply(func)
71
-
72
- # Split train and test tsdf
73
- train_tsdf = tsdf.iloc[: len(train_tsdf)]
74
- test_tsdf = tsdf.iloc[len(train_tsdf) :]
75
-
76
- assert test_tsdf[target_column].isna().all()
77
-
78
- return train_tsdf, test_tsdf
@@ -1,11 +0,0 @@
1
- tabpfn_time_series/__init__.py,sha256=5ruHrmKBQRIZ3WXLA8du4JKttF55ntnI74hkRsHThQ8,256
2
- tabpfn_time_series/data_preparation.py,sha256=iNW7sAnRkTgmzzOEHBhkkTwm_lQ3p_Q9xgAQ5PbkOts,5416
3
- tabpfn_time_series/defaults.py,sha256=u2_JnwxiZ5NNibzyNpsE63KuP3TcmOL1iAP8llZ2rJk,238
4
- tabpfn_time_series/feature.py,sha256=_9FxfQfgPOOO1MiT8hB8523eZ3Nc5oKuoY7vcohKZZc,2531
5
- tabpfn_time_series/plot.py,sha256=bwSYcWBanzPrUxXKFsbqG8fyGsOJZfgU2v3NsxzTSXo,6571
6
- tabpfn_time_series/predictor.py,sha256=W9JijaxFaR0chfiW7m4RuDQ0wrRcJezDWVwCBEOQDFk,1502
7
- tabpfn_time_series/tabpfn_worker.py,sha256=XNpqLEW51PgzrEopNNdtGdYArMCHT4yeBK3BS3z25K0,5021
8
- tabpfn_time_series-0.1.2.dist-info/METADATA,sha256=hO69b8GN3GDRIetG4DGtxpdMubc8sm8h_aI2RwEto2U,3285
9
- tabpfn_time_series-0.1.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
- tabpfn_time_series-0.1.2.dist-info/licenses/LICENSE.txt,sha256=iwhPL7kIWQG6gyLZZwIMDItGrNgxMDIq9itxkUSMapY,11345
11
- tabpfn_time_series-0.1.2.dist-info/RECORD,,