tabpfn-time-series 0.1.3__tar.gz → 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/.github/workflows/pull_request.yml +2 -2
  2. {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/PKG-INFO +20 -18
  3. {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/README.md +12 -14
  4. {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/pyproject.toml +2 -2
  5. {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/requirements-dev.txt +2 -0
  6. tabpfn_time_series-1.0.0/requirements.txt +11 -0
  7. {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/tabpfn_time_series/__init__.py +1 -2
  8. tabpfn_time_series-1.0.0/tabpfn_time_series/features/__init__.py +17 -0
  9. tabpfn_time_series-1.0.0/tabpfn_time_series/features/auto_features.py +307 -0
  10. tabpfn_time_series-1.0.0/tabpfn_time_series/features/basic_features.py +88 -0
  11. tabpfn_time_series-1.0.0/tabpfn_time_series/features/feature_generator_base.py +21 -0
  12. tabpfn_time_series-1.0.0/tabpfn_time_series/features/feature_transformer.py +53 -0
  13. {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/tests/test_predictor.py +16 -8
  14. tabpfn_time_series-0.1.3/.cache/huggingface/.gitignore +0 -1
  15. tabpfn_time_series-0.1.3/.cache/huggingface/download/config.json.lock +0 -0
  16. tabpfn_time_series-0.1.3/.cache/huggingface/download/config.json.metadata +0 -3
  17. tabpfn_time_series-0.1.3/.cache/huggingface/download/tabpfn-v2-regressor-2noar4o2.ckpt.lock +0 -0
  18. tabpfn_time_series-0.1.3/.cache/huggingface/download/tabpfn-v2-regressor-2noar4o2.ckpt.metadata +0 -3
  19. tabpfn_time_series-0.1.3/local_notebooks/analysis.ipynb +0 -1193
  20. tabpfn_time_series-0.1.3/local_notebooks/figures/performance_by_dataset_MASE.pdf +0 -0
  21. tabpfn_time_series-0.1.3/local_notebooks/figures/performance_by_domain_MASE.pdf +0 -0
  22. tabpfn_time_series-0.1.3/local_notebooks/figures/performance_by_freq_MASE.pdf +0 -0
  23. tabpfn_time_series-0.1.3/local_notebooks/figures/performance_by_num_variates_MASE.pdf +0 -0
  24. tabpfn_time_series-0.1.3/local_notebooks/trend_column.ipynb +0 -1055
  25. tabpfn_time_series-0.1.3/local_scripts/aggregate_result.py +0 -112
  26. tabpfn_time_series-0.1.3/local_scripts/run_eval_on_slurm.py +0 -111
  27. tabpfn_time_series-0.1.3/playground.ipynb +0 -470
  28. tabpfn_time_series-0.1.3/requirements.txt +0 -7
  29. tabpfn_time_series-0.1.3/tabpfn-v2-regressor-2noar4o2.ckpt +0 -0
  30. tabpfn_time_series-0.1.3/tabpfn_time_series/feature.py +0 -78
  31. {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/.gitignore +0 -0
  32. {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/.pre-commit-config.yaml +0 -0
  33. {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/LICENSE.txt +0 -0
  34. {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/demo.ipynb +0 -0
  35. {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/ruff.toml +0 -0
  36. {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/tabpfn_time_series/data_preparation.py +0 -0
  37. {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/tabpfn_time_series/defaults.py +0 -0
  38. {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/tabpfn_time_series/plot.py +0 -0
  39. {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/tabpfn_time_series/predictor.py +0 -0
  40. {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/tabpfn_time_series/tabpfn_worker.py +0 -0
@@ -27,7 +27,7 @@ jobs:
27
27
  fail-fast: false
28
28
  matrix:
29
29
  os: [ubuntu-latest, macos-latest, windows-latest]
30
- python-version: ["3.9", "3.10"]
30
+ python-version: ["3.10", "3.11", "3.12"]
31
31
  runs-on: ${{ matrix.os }}
32
32
  steps:
33
33
  - uses: actions/checkout@v4
@@ -56,4 +56,4 @@ jobs:
56
56
  env:
57
57
  TABPFN_CLIENT_API_KEY: ${{ secrets.TABPFN_CLIENT_API_KEY }}
58
58
  run: |
59
- python -m unittest discover -s tests -t tests
59
+ python -m unittest discover -s tests -t tests
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tabpfn_time_series
3
- Version: 0.1.3
4
- Summary: Zero-shot time series forecasting with TabPFN
3
+ Version: 1.0.0
4
+ Summary: Zero-shot time series forecasting with TabPFNv2
5
5
  Project-URL: Homepage, https://github.com/liam-sbhoo/tabpfn-time-series
6
6
  Project-URL: Bug Tracker, https://github.com/liam-sbhoo/tabpfn-time-series/issues
7
7
  Author-email: Liam Shi Bin Hoo <hoos@tf.uni-freiburg.de>
@@ -14,32 +14,39 @@ Requires-Dist: autogluon-timeseries>=1.2
14
14
  Requires-Dist: datasets>=3.3.2
15
15
  Requires-Dist: gluonts>=0.16.0
16
16
  Requires-Dist: pandas<2.2.0,>=2.1.2
17
- Requires-Dist: tabpfn-client>=0.1.1
18
- Requires-Dist: tabpfn>=2.0.0
17
+ Requires-Dist: python-dotenv>=1.1.0
18
+ Requires-Dist: pyyaml>=6.0.1
19
+ Requires-Dist: tabpfn-client>=0.1.7
20
+ Requires-Dist: tabpfn>=2.0.9
19
21
  Requires-Dist: tqdm
20
22
  Provides-Extra: dev
21
23
  Requires-Dist: build; extra == 'dev'
22
24
  Requires-Dist: jupyter; extra == 'dev'
23
25
  Requires-Dist: pre-commit; extra == 'dev'
24
26
  Requires-Dist: ruff; extra == 'dev'
27
+ Requires-Dist: submitit>=1.5.2; extra == 'dev'
25
28
  Requires-Dist: twine; extra == 'dev'
29
+ Requires-Dist: wandb>=0.19.8; extra == 'dev'
26
30
  Description-Content-Type: text/markdown
27
31
 
28
- # Zero-Shot Time Series Forecasting with TabPFN
32
+ # TabPFN-TS
33
+
34
+ > Zero-Shot Time Series Forecasting with TabPFNv2
29
35
 
30
36
  [![PyPI version](https://badge.fury.io/py/tabpfn-time-series.svg)](https://badge.fury.io/py/tabpfn-time-series)
31
37
  [![colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/liam-sbhoo/tabpfn-time-series/blob/main/demo.ipynb)
32
38
  [![Discord](https://img.shields.io/discord/1285598202732482621?color=7289da&label=Discord&logo=discord&logoColor=ffffff)](https://discord.com/channels/1285598202732482621/)
33
- [![arXiv](https://img.shields.io/badge/arXiv-2501.02945-<COLOR>.svg)](https://arxiv.org/abs/2501.02945)
39
+ [![arXiv](https://img.shields.io/badge/arXiv-2501.02945-<COLOR>.svg)](https://arxiv.org/abs/2501.02945v3)
34
40
 
35
41
  ## 📌 News
42
+ - **27-05-2025**: 📝 New **[paper](https://arxiv.org/abs/2501.02945v3)** version and **v1.0.0** release! Strong [GIFT-EVAL](https://huggingface.co/spaces/Salesforce/GIFT-Eval) results, new AutoSeasonalFeatures, improved CalendarFeatures.
36
43
  - **27-01-2025**: 🚀 Ranked _**1st**_ on [GIFT-EVAL](https://huggingface.co/spaces/Salesforce/GIFT-Eval) benchmark<sup>[1]</sup>!
37
- - **10-10-2024**: 🚀 TabPFN-TS [paper](https://arxiv.org/abs/2501.02945) accepted to NeurIPS 2024 [TRL](https://table-representation-learning.github.io/NeurIPS2024/) and [TSALM](https://neurips-time-series-workshop.github.io/) workshops!
44
+ - **10-10-2024**: 🚀 TabPFN-TS [paper](https://arxiv.org/abs/2501.02945v2) accepted to NeurIPS 2024 [TRL](https://table-representation-learning.github.io/NeurIPS2024/) and [TSALM](https://neurips-time-series-workshop.github.io/) workshops!
38
45
 
39
46
  _[1] Last checked on: 10/03/2025_
40
47
 
41
48
  ## ✨ Introduction
42
- We demonstrate that the tabular foundation model **[TabPFN](https://github.com/PriorLabs/TabPFN)**, when paired with minimal featurization, can perform zero-shot time series forecasting. Its performance on point forecasting matches or even slightly outperforms state-of-the-art methods.
49
+ We demonstrate that the tabular foundation model **[TabPFNv2](https://github.com/PriorLabs/TabPFN)**, combined with lightweight feature engineering, enables zero-shot time series forecasting for both point and probabilistic tasks. On the **[GIFT-EVAL](https://huggingface.co/spaces/Salesforce/GIFT-Eval)** benchmark, our method achieves performance on par with top-tier models across both evaluation metrics.
43
50
 
44
51
  ## 📖 How does it work?
45
52
 
@@ -49,27 +56,22 @@ Our work proposes to frame **univariate time series forecasting** as a **tabular
49
56
 
50
57
  Concretely, we:
51
58
  1. Transform a time series into a table
52
- 2. Extract features from timestamp and add them to the table
53
- 3. Perform regression on the table using TabPFN
59
+ 2. Extract features and add them to the table
60
+ 3. Perform regression on the table using TabPFNv2
54
61
  4. Use regression results as time series forecasting outputs
55
62
 
56
- For more details, please refer to our [paper](https://arxiv.org/abs/2501.02945) and our [poster](docs/tabpfn-ts-neurips-poster.pdf) (presented at NeurIPS 2024 TRL and TSALM workshops).
63
+ For more details, please refer to our [paper](https://arxiv.org/abs/2501.02945v3).
64
+ <!-- and our [poster](docs/tabpfn-ts-neurips-poster.pdf) (presented at NeurIPS 2024 TRL and TSALM workshops). -->
57
65
 
58
66
  ## 👉 **Why gives us a try?**
59
67
  - **Zero-shot forecasting**: this method is extremely fast and requires no training, making it highly accessible for experimenting with your own problems.
60
68
  - **Point and probabilistic forecasting**: it provides accurate point forecasts as well as probabilistic forecasts.
61
69
  - **Support for exogenous variables**: if you have exogenous variables, this method can seemlessly incorporate them into the forecasting model.
62
70
 
63
- On top of that, thanks to **[tabpfn-client](https://github.com/automl/tabpfn-client)** from **[Prior Labs](https://priorlabs.ai)**, you won’t even need your own GPU to run fast inference with TabPFN. 😉 We have included `tabpfn-client` as the default engine in our implementation.
71
+ On top of that, thanks to **[tabpfn-client](https://github.com/automl/tabpfn-client)** from **[Prior Labs](https://priorlabs.ai)**, you won’t even need your own GPU to run fast inference with TabPFNv2. 😉 We have included `tabpfn-client` as the default engine in our implementation.
64
72
 
65
73
  ## How to use it?
66
74
 
67
75
  [![colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/liam-sbhoo/tabpfn-time-series/blob/main/demo.ipynb)
68
76
 
69
77
  The demo should explain it all. 😉
70
-
71
- ## 📊 GIFT-EVAL Benchmark
72
-
73
- We have submitted our results to the [GIFT-EVAL](https://huggingface.co/spaces/Salesforce/GIFT-Eval) benchmark. Stay tuned for results!
74
-
75
- For more details regarding the evaluation setup, please refer to [README.md](gift_eval/README.md).
@@ -1,18 +1,21 @@
1
- # Zero-Shot Time Series Forecasting with TabPFN
1
+ # TabPFN-TS
2
+
3
+ > Zero-Shot Time Series Forecasting with TabPFNv2
2
4
 
3
5
  [![PyPI version](https://badge.fury.io/py/tabpfn-time-series.svg)](https://badge.fury.io/py/tabpfn-time-series)
4
6
  [![colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/liam-sbhoo/tabpfn-time-series/blob/main/demo.ipynb)
5
7
  [![Discord](https://img.shields.io/discord/1285598202732482621?color=7289da&label=Discord&logo=discord&logoColor=ffffff)](https://discord.com/channels/1285598202732482621/)
6
- [![arXiv](https://img.shields.io/badge/arXiv-2501.02945-<COLOR>.svg)](https://arxiv.org/abs/2501.02945)
8
+ [![arXiv](https://img.shields.io/badge/arXiv-2501.02945-<COLOR>.svg)](https://arxiv.org/abs/2501.02945v3)
7
9
 
8
10
  ## 📌 News
11
+ - **27-05-2025**: 📝 New **[paper](https://arxiv.org/abs/2501.02945v3)** version and **v1.0.0** release! Strong [GIFT-EVAL](https://huggingface.co/spaces/Salesforce/GIFT-Eval) results, new AutoSeasonalFeatures, improved CalendarFeatures.
9
12
  - **27-01-2025**: 🚀 Ranked _**1st**_ on [GIFT-EVAL](https://huggingface.co/spaces/Salesforce/GIFT-Eval) benchmark<sup>[1]</sup>!
10
- - **10-10-2024**: 🚀 TabPFN-TS [paper](https://arxiv.org/abs/2501.02945) accepted to NeurIPS 2024 [TRL](https://table-representation-learning.github.io/NeurIPS2024/) and [TSALM](https://neurips-time-series-workshop.github.io/) workshops!
13
+ - **10-10-2024**: 🚀 TabPFN-TS [paper](https://arxiv.org/abs/2501.02945v2) accepted to NeurIPS 2024 [TRL](https://table-representation-learning.github.io/NeurIPS2024/) and [TSALM](https://neurips-time-series-workshop.github.io/) workshops!
11
14
 
12
15
  _[1] Last checked on: 10/03/2025_
13
16
 
14
17
  ## ✨ Introduction
15
- We demonstrate that the tabular foundation model **[TabPFN](https://github.com/PriorLabs/TabPFN)**, when paired with minimal featurization, can perform zero-shot time series forecasting. Its performance on point forecasting matches or even slightly outperforms state-of-the-art methods.
18
+ We demonstrate that the tabular foundation model **[TabPFNv2](https://github.com/PriorLabs/TabPFN)**, combined with lightweight feature engineering, enables zero-shot time series forecasting for both point and probabilistic tasks. On the **[GIFT-EVAL](https://huggingface.co/spaces/Salesforce/GIFT-Eval)** benchmark, our method achieves performance on par with top-tier models across both evaluation metrics.
16
19
 
17
20
  ## 📖 How does it work?
18
21
 
@@ -22,27 +25,22 @@ Our work proposes to frame **univariate time series forecasting** as a **tabular
22
25
 
23
26
  Concretely, we:
24
27
  1. Transform a time series into a table
25
- 2. Extract features from timestamp and add them to the table
26
- 3. Perform regression on the table using TabPFN
28
+ 2. Extract features and add them to the table
29
+ 3. Perform regression on the table using TabPFNv2
27
30
  4. Use regression results as time series forecasting outputs
28
31
 
29
- For more details, please refer to our [paper](https://arxiv.org/abs/2501.02945) and our [poster](docs/tabpfn-ts-neurips-poster.pdf) (presented at NeurIPS 2024 TRL and TSALM workshops).
32
+ For more details, please refer to our [paper](https://arxiv.org/abs/2501.02945v3).
33
+ <!-- and our [poster](docs/tabpfn-ts-neurips-poster.pdf) (presented at NeurIPS 2024 TRL and TSALM workshops). -->
30
34
 
31
35
  ## 👉 **Why gives us a try?**
32
36
  - **Zero-shot forecasting**: this method is extremely fast and requires no training, making it highly accessible for experimenting with your own problems.
33
37
  - **Point and probabilistic forecasting**: it provides accurate point forecasts as well as probabilistic forecasts.
34
38
  - **Support for exogenous variables**: if you have exogenous variables, this method can seemlessly incorporate them into the forecasting model.
35
39
 
36
- On top of that, thanks to **[tabpfn-client](https://github.com/automl/tabpfn-client)** from **[Prior Labs](https://priorlabs.ai)**, you won’t even need your own GPU to run fast inference with TabPFN. 😉 We have included `tabpfn-client` as the default engine in our implementation.
40
+ On top of that, thanks to **[tabpfn-client](https://github.com/automl/tabpfn-client)** from **[Prior Labs](https://priorlabs.ai)**, you won’t even need your own GPU to run fast inference with TabPFNv2. 😉 We have included `tabpfn-client` as the default engine in our implementation.
37
41
 
38
42
  ## How to use it?
39
43
 
40
44
  [![colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/liam-sbhoo/tabpfn-time-series/blob/main/demo.ipynb)
41
45
 
42
46
  The demo should explain it all. 😉
43
-
44
- ## 📊 GIFT-EVAL Benchmark
45
-
46
- We have submitted our results to the [GIFT-EVAL](https://huggingface.co/spaces/Salesforce/GIFT-Eval) benchmark. Stay tuned for results!
47
-
48
- For more details regarding the evaluation setup, please refer to [README.md](gift_eval/README.md).
@@ -4,11 +4,11 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "tabpfn_time_series"
7
- version = "0.1.3"
7
+ version = "1.0.0"
8
8
  authors = [
9
9
  { name="Liam Shi Bin Hoo", email="hoos@tf.uni-freiburg.de" },
10
10
  ]
11
- description = "Zero-shot time series forecasting with TabPFN"
11
+ description = "Zero-shot time series forecasting with TabPFNv2"
12
12
  readme = "README.md"
13
13
  requires-python = ">=3.10"
14
14
  classifiers = [
@@ -1,5 +1,7 @@
1
1
  # for development
2
2
  jupyter
3
+ wandb>=0.19.8
4
+ submitit>=1.5.2
3
5
  ruff
4
6
  pre-commit
5
7
  twine
@@ -0,0 +1,11 @@
1
+ tqdm
2
+ pandas>=2.1.2, <2.2.0
3
+ gluonts>=0.16.0
4
+ autogluon.timeseries>=1.2
5
+ tabpfn-client>=0.1.7
6
+ tabpfn>=2.0.9
7
+ datasets>=3.3.2
8
+ python-dotenv>=1.1.0
9
+
10
+ # for python 3.12
11
+ pyyaml>=6.0.1
@@ -1,11 +1,10 @@
1
- from .feature import DefaultFeatures, FeatureTransformer
1
+ from .features import FeatureTransformer
2
2
  from .predictor import TabPFNTimeSeriesPredictor, TabPFNMode
3
3
  from .defaults import TABPFN_TS_DEFAULT_QUANTILE_CONFIG
4
4
 
5
5
  __version__ = "0.1.0"
6
6
 
7
7
  __all__ = [
8
- "DefaultFeatures",
9
8
  "FeatureTransformer",
10
9
  "TabPFNTimeSeriesPredictor",
11
10
  "TabPFNMode",
@@ -0,0 +1,17 @@
1
+ from .basic_features import (
2
+ RunningIndexFeature,
3
+ CalendarFeature,
4
+ AdditionalCalendarFeature,
5
+ PeriodicSinCosineFeature,
6
+ )
7
+ from .auto_features import AutoSeasonalFeature
8
+ from .feature_transformer import FeatureTransformer
9
+
10
+ __all__ = [
11
+ "RunningIndexFeature",
12
+ "CalendarFeature",
13
+ "AdditionalCalendarFeature",
14
+ "AutoSeasonalFeature",
15
+ "PeriodicSinCosineFeature",
16
+ "FeatureTransformer",
17
+ ]
@@ -0,0 +1,307 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from typing import List, Optional, Tuple, Literal
4
+
5
+ import logging
6
+
7
+ from scipy import fft
8
+ from scipy.signal import find_peaks
9
+ from statsmodels.tsa.stattools import acf
10
+
11
+ from tabpfn_time_series.features.feature_generator_base import (
12
+ FeatureGenerator,
13
+ )
14
+ from tabpfn_time_series.features.basic_features import (
15
+ PeriodicSinCosineFeature,
16
+ )
17
+
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class AutoSeasonalFeature(FeatureGenerator):
23
+ class Config:
24
+ max_top_k: int = 5
25
+ do_detrend: bool = True
26
+ detrend_type: Literal["first_diff", "loess", "linear", "constant"] = "linear"
27
+ use_peaks_only: bool = True
28
+ apply_hann_window: bool = True
29
+ zero_padding_factor: int = 2
30
+ round_to_closest_integer: bool = True
31
+ validate_with_acf: bool = False
32
+ sampling_interval: float = 1.0
33
+ magnitude_threshold: Optional[float] = 0.05
34
+ relative_threshold: bool = True
35
+ exclude_zero: bool = True
36
+
37
+ def __init__(self, config: Optional[dict] = None):
38
+ # Create default config from Config class
39
+ default_config = {
40
+ k: v for k, v in vars(self.Config).items() if not k.startswith("__")
41
+ }
42
+
43
+ # Initialize config with defaults
44
+ self.config = default_config.copy()
45
+
46
+ # Update with user-provided config if any
47
+ if config is not None:
48
+ self.config.update(config)
49
+
50
+ # Validate config parameters
51
+ self._validate_config()
52
+
53
+ logger.debug(f"Initialized AutoSeasonalFeature with config: {self.config}")
54
+
55
+ def _validate_config(self):
56
+ """Validate configuration parameters"""
57
+ if self.config["max_top_k"] < 1:
58
+ logger.warning("max_top_k must be at least 1, setting to 1")
59
+ self.config["max_top_k"] = 1
60
+
61
+ if self.config["zero_padding_factor"] < 1:
62
+ logger.warning("zero_padding_factor must be at least 1, setting to 1")
63
+ self.config["zero_padding_factor"] = 1
64
+
65
+ if self.config["detrend_type"] not in [
66
+ "first_diff",
67
+ "loess",
68
+ "linear",
69
+ "constant",
70
+ ]:
71
+ logger.warning(
72
+ f"Invalid detrend_type: {self.config['detrend_type']}, using 'linear'"
73
+ )
74
+ self.config["detrend_type"] = "linear"
75
+
76
+ def generate(self, df: pd.DataFrame) -> pd.DataFrame:
77
+ df = df.copy()
78
+
79
+ # Detect seasonal periods from target data
80
+ detected_periods_and_magnitudes = self.find_seasonal_periods(
81
+ df.target, **self.config
82
+ )
83
+ logger.debug(
84
+ f"Found {len(detected_periods_and_magnitudes)} seasonal periods: {detected_periods_and_magnitudes}"
85
+ )
86
+
87
+ # Extract just the periods (without magnitudes)
88
+ periods = [period for period, _ in detected_periods_and_magnitudes]
89
+
90
+ # Generate features for detected periods using PeriodicSinCosineFeature
91
+ if periods:
92
+ feature_generator = PeriodicSinCosineFeature(periods=periods)
93
+ df = feature_generator.generate(df)
94
+
95
+ # Standardize column names for consistency across time series
96
+ renamed_columns = {}
97
+ for i, period in enumerate(periods):
98
+ renamed_columns[f"sin_{period}"] = f"sin_#{i}"
99
+ renamed_columns[f"cos_{period}"] = f"cos_#{i}"
100
+
101
+ df = df.rename(columns=renamed_columns)
102
+
103
+ # Add placeholder zero columns for missing periods up to max_top_k
104
+ for i in range(len(periods), self.config["max_top_k"]):
105
+ df[f"sin_#{i}"] = 0.0
106
+ df[f"cos_#{i}"] = 0.0
107
+
108
+ return df
109
+
110
+ @staticmethod
111
+ def find_seasonal_periods(
112
+ target_values: pd.Series,
113
+ max_top_k: int = 10,
114
+ do_detrend: bool = True,
115
+ detrend_type: Literal[
116
+ "first_diff", "loess", "linear", "constant"
117
+ ] = "first_diff",
118
+ use_peaks_only: bool = True,
119
+ apply_hann_window: bool = True,
120
+ zero_padding_factor: int = 2,
121
+ round_to_closest_integer: bool = True,
122
+ validate_with_acf: bool = False,
123
+ sampling_interval: float = 1.0,
124
+ magnitude_threshold: Optional[
125
+ float
126
+ ] = 0.05, # Default relative threshold (5% of max)
127
+ relative_threshold: bool = True, # Interpret threshold as a fraction of max FFT magnitude
128
+ exclude_zero: bool = False,
129
+ ) -> List[Tuple[float, float]]:
130
+ """
131
+ Identify dominant seasonal periods in a time series using FFT.
132
+
133
+ Parameters:
134
+ - target_values: pd.Series
135
+ Input time series data.
136
+ - max_top_k: int
137
+ Maximum number of dominant periods to return.
138
+ - do_detrend: bool
139
+ If True, remove the linear trend from the signal.
140
+ - use_peaks_only: bool
141
+ If True, consider only local peaks in the FFT magnitude spectrum.
142
+ - apply_hann_window: bool
143
+ If True, apply a Hann window to reduce spectral leakage.
144
+ - zero_padding_factor: int
145
+ Factor by which to zero-pad the signal for finer frequency resolution.
146
+ - round_to_closest_integer: bool
147
+ If True, round the detected periods to the nearest integer.
148
+ - validate_with_acf: bool
149
+ If True, validate detected periods against the autocorrelation function.
150
+ - sampling_interval: float
151
+ Time interval between consecutive samples.
152
+ - magnitude_threshold: Optional[float]
153
+ Threshold to filter out less significant frequency components.
154
+ Default is 0.05, interpreted as 5% of the maximum FFT magnitude if relative_threshold is True.
155
+ - relative_threshold: bool
156
+ If True, the `magnitude_threshold` is interpreted as a fraction of the maximum FFT magnitude.
157
+ Otherwise, it is treated as an absolute threshold value.
158
+ - exclude_zero: bool
159
+ If True, exclude periods of 0 from the results.
160
+
161
+ Returns:
162
+ - List[Tuple[float, float]]:
163
+ A list of (period, magnitude) tuples, sorted in descending order by magnitude.
164
+ """
165
+ # Convert the Pandas Series to a NumPy array
166
+ values = np.array(target_values, dtype=float)
167
+
168
+ # Quick hack to ignore the test_X
169
+ # (Assuming train_X target is not NaN, and test_X target is NaN)
170
+ # Dropping all the NaN values
171
+ values = values[~np.isnan(values)]
172
+
173
+ N_original = len(values)
174
+
175
+ # Detrend the signal using a linear detrend method if requested
176
+ if do_detrend:
177
+ values = detrend(values, detrend_type)
178
+
179
+ # Apply a Hann window to reduce spectral leakage
180
+ if apply_hann_window:
181
+ window = np.hanning(N_original)
182
+ values = values * window
183
+
184
+ # Zero-pad the signal for improved frequency resolution
185
+ if zero_padding_factor > 1:
186
+ padded_length = int(N_original * zero_padding_factor)
187
+ padded_values = np.zeros(padded_length)
188
+ padded_values[:N_original] = values
189
+ values = padded_values
190
+ N = padded_length
191
+ else:
192
+ N = N_original
193
+
194
+ # Compute the FFT (using rfft) and obtain frequency bins
195
+ fft_values = fft.rfft(values)
196
+ fft_magnitudes = np.abs(fft_values)
197
+ freqs = np.fft.rfftfreq(N, d=sampling_interval)
198
+
199
+ # Exclude the DC component (0 Hz) to avoid bias from the signal's mean
200
+ fft_magnitudes[0] = 0.0
201
+
202
+ # Determine the threshold (absolute value)
203
+ if magnitude_threshold is not None and relative_threshold:
204
+ threshold_value = magnitude_threshold * np.max(fft_magnitudes)
205
+ else:
206
+ threshold_value = magnitude_threshold
207
+
208
+ # Identify dominant frequencies
209
+ if use_peaks_only:
210
+ if threshold_value is not None:
211
+ peak_indices, _ = find_peaks(fft_magnitudes, height=threshold_value)
212
+ else:
213
+ peak_indices, _ = find_peaks(fft_magnitudes)
214
+ if len(peak_indices) == 0:
215
+ # Fallback to considering all frequency bins if no peaks are found
216
+ peak_indices = np.arange(len(fft_magnitudes))
217
+ # Sort the peak indices by magnitude in descending order
218
+ sorted_peak_indices = peak_indices[
219
+ np.argsort(fft_magnitudes[peak_indices])[::-1]
220
+ ]
221
+ top_indices = sorted_peak_indices[:max_top_k]
222
+ else:
223
+ sorted_indices = np.argsort(fft_magnitudes)[::-1]
224
+ if threshold_value is not None:
225
+ sorted_indices = [
226
+ i for i in sorted_indices if fft_magnitudes[i] >= threshold_value
227
+ ]
228
+ top_indices = sorted_indices[:max_top_k]
229
+
230
+ # Convert frequencies to periods (avoiding division by zero)
231
+ periods = np.zeros_like(freqs)
232
+ non_zero = freqs > 0
233
+ periods[non_zero] = 1.0 / freqs[non_zero]
234
+ top_periods = periods[top_indices]
235
+
236
+ logger.debug(f"Top periods: {top_periods}")
237
+
238
+ # Optionally round the periods to the nearest integer
239
+ if round_to_closest_integer:
240
+ top_periods = np.round(top_periods)
241
+
242
+ # Filter out zero periods if requested
243
+ if exclude_zero:
244
+ non_zero_mask = top_periods != 0
245
+ top_periods = top_periods[non_zero_mask]
246
+ top_indices = top_indices[non_zero_mask]
247
+
248
+ # Keep unique periods only
249
+ if len(top_periods) > 0:
250
+ unique_period_indices = np.unique(top_periods, return_index=True)[1]
251
+ top_periods = top_periods[unique_period_indices]
252
+ top_indices = top_indices[unique_period_indices]
253
+
254
+ # Pair each period with its corresponding magnitude
255
+ results = [
256
+ (top_periods[i], fft_magnitudes[top_indices[i]])
257
+ for i in range(len(top_indices))
258
+ ]
259
+
260
+ # Validate with ACF if requested and filter the results accordingly
261
+ if validate_with_acf:
262
+ # Compute ACF on the original (non-padded) detrended signal
263
+ acf_values = acf(
264
+ np.array(target_values, dtype=float)[:N_original],
265
+ nlags=N_original,
266
+ fft=True,
267
+ )
268
+ acf_peak_indices, _ = find_peaks(
269
+ acf_values, height=1.96 / np.sqrt(N_original)
270
+ )
271
+ validated_results = []
272
+ for period, mag in results:
273
+ period_int = int(round(period))
274
+ if period_int < len(acf_values) and any(
275
+ abs(period_int - peak) <= 1 for peak in acf_peak_indices
276
+ ):
277
+ validated_results.append((period, mag))
278
+ if validated_results:
279
+ results = validated_results
280
+
281
+ # Ensure the final results are sorted in descending order by magnitude
282
+ results.sort(key=lambda x: x[1], reverse=True)
283
+
284
+ return results
285
+
286
+
287
+ def detrend(
288
+ x: np.ndarray, detrend_type: Literal["first_diff", "loess", "linear"]
289
+ ) -> np.ndarray:
290
+ if detrend_type == "first_diff":
291
+ return np.diff(x, prepend=x[0])
292
+
293
+ elif detrend_type == "loess":
294
+ from statsmodels.api import nonparametric
295
+
296
+ indices = np.arange(len(x))
297
+ lowess = nonparametric.lowess(x, indices, frac=0.1)
298
+ trend = lowess[:, 1]
299
+ return x - trend
300
+
301
+ elif detrend_type in ["linear", "constant"]:
302
+ from scipy.signal import detrend as scipy_detrend
303
+
304
+ return scipy_detrend(x, type=detrend_type)
305
+
306
+ else:
307
+ raise ValueError(f"Invalid detrend method: {detrend_type}")
@@ -0,0 +1,88 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from typing import List, Dict, Optional
4
+
5
+ import gluonts.time_feature
6
+
7
+ from tabpfn_time_series.features.feature_generator_base import (
8
+ FeatureGenerator,
9
+ )
10
+
11
+
12
+ class RunningIndexFeature(FeatureGenerator):
13
+ def generate(self, df: pd.DataFrame) -> pd.DataFrame:
14
+ df = df.copy()
15
+ df["running_index"] = range(len(df))
16
+ return df
17
+
18
+
19
+ class CalendarFeature(FeatureGenerator):
20
+ def __init__(
21
+ self,
22
+ components: Optional[List[str]] = None,
23
+ seasonal_features: Optional[Dict[str, List[float]]] = None,
24
+ ):
25
+ self.components = components or ["year"]
26
+ self.seasonal_features = seasonal_features or {
27
+ # (feature, natural seasonality)
28
+ "second_of_minute": [60],
29
+ "minute_of_hour": [60],
30
+ "hour_of_day": [24],
31
+ "day_of_week": [7],
32
+ "day_of_month": [30.5],
33
+ "day_of_year": [365],
34
+ "week_of_year": [52],
35
+ "month_of_year": [12],
36
+ }
37
+
38
+ def generate(self, df: pd.DataFrame) -> pd.DataFrame:
39
+ df = df.copy()
40
+ timestamps = df.index.get_level_values("timestamp")
41
+
42
+ # Add basic calendar components
43
+ for component in self.components:
44
+ df[component] = getattr(timestamps, component)
45
+
46
+ # Add seasonal features
47
+ for feature_name, periods in self.seasonal_features.items():
48
+ feature_func = getattr(gluonts.time_feature, f"{feature_name}_index")
49
+ feature = feature_func(timestamps).astype(np.int32)
50
+
51
+ if periods is not None:
52
+ for period in periods:
53
+ period = period - 1 # Adjust for 0-based indexing
54
+ df[f"{feature_name}_sin"] = np.sin(2 * np.pi * feature / period)
55
+ df[f"{feature_name}_cos"] = np.cos(2 * np.pi * feature / period)
56
+ else:
57
+ df[feature_name] = feature
58
+
59
+ return df
60
+
61
+
62
+ class AdditionalCalendarFeature(CalendarFeature):
63
+ def __init__(
64
+ self,
65
+ components: Optional[List[str]] = None,
66
+ additional_seasonal_features: Optional[Dict[str, List[float]]] = None,
67
+ ):
68
+ super().__init__(components=components)
69
+
70
+ self.seasonal_features = {
71
+ **additional_seasonal_features,
72
+ **self.seasonal_features,
73
+ }
74
+
75
+
76
+ class PeriodicSinCosineFeature(FeatureGenerator):
77
+ def __init__(self, periods: List[float], name_suffix: str = None):
78
+ self.periods = periods
79
+ self.name_suffix = name_suffix
80
+
81
+ def generate(self, df: pd.DataFrame) -> pd.DataFrame:
82
+ df = df.copy()
83
+ for i, period in enumerate(self.periods):
84
+ name_suffix = f"{self.name_suffix}_{i}" if self.name_suffix else f"{period}"
85
+ df[f"sin_{name_suffix}"] = np.sin(2 * np.pi * np.arange(len(df)) / period)
86
+ df[f"cos_{name_suffix}"] = np.cos(2 * np.pi * np.arange(len(df)) / period)
87
+
88
+ return df
@@ -0,0 +1,21 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+ import pandas as pd
4
+
5
+
6
+ class FeatureGenerator(ABC):
7
+ """Abstract base class for feature generators"""
8
+
9
+ @abstractmethod
10
+ def generate(self, df: pd.DataFrame) -> pd.DataFrame:
11
+ """Generate features for the given dataframe"""
12
+ pass
13
+
14
+ def __call__(self, df: pd.DataFrame) -> pd.DataFrame:
15
+ return self.generate(df)
16
+
17
+ def __str__(self) -> str:
18
+ return f"{self.__class__.__name__}_{self.__dict__}"
19
+
20
+ def __repr__(self) -> str:
21
+ return self.__str__()