tabpfn-time-series 0.1.3__tar.gz → 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/.github/workflows/pull_request.yml +2 -2
- {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/PKG-INFO +20 -18
- {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/README.md +12 -14
- {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/pyproject.toml +2 -2
- {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/requirements-dev.txt +2 -0
- tabpfn_time_series-1.0.0/requirements.txt +11 -0
- {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/tabpfn_time_series/__init__.py +1 -2
- tabpfn_time_series-1.0.0/tabpfn_time_series/features/__init__.py +17 -0
- tabpfn_time_series-1.0.0/tabpfn_time_series/features/auto_features.py +307 -0
- tabpfn_time_series-1.0.0/tabpfn_time_series/features/basic_features.py +88 -0
- tabpfn_time_series-1.0.0/tabpfn_time_series/features/feature_generator_base.py +21 -0
- tabpfn_time_series-1.0.0/tabpfn_time_series/features/feature_transformer.py +53 -0
- {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/tests/test_predictor.py +16 -8
- tabpfn_time_series-0.1.3/.cache/huggingface/.gitignore +0 -1
- tabpfn_time_series-0.1.3/.cache/huggingface/download/config.json.lock +0 -0
- tabpfn_time_series-0.1.3/.cache/huggingface/download/config.json.metadata +0 -3
- tabpfn_time_series-0.1.3/.cache/huggingface/download/tabpfn-v2-regressor-2noar4o2.ckpt.lock +0 -0
- tabpfn_time_series-0.1.3/.cache/huggingface/download/tabpfn-v2-regressor-2noar4o2.ckpt.metadata +0 -3
- tabpfn_time_series-0.1.3/local_notebooks/analysis.ipynb +0 -1193
- tabpfn_time_series-0.1.3/local_notebooks/figures/performance_by_dataset_MASE.pdf +0 -0
- tabpfn_time_series-0.1.3/local_notebooks/figures/performance_by_domain_MASE.pdf +0 -0
- tabpfn_time_series-0.1.3/local_notebooks/figures/performance_by_freq_MASE.pdf +0 -0
- tabpfn_time_series-0.1.3/local_notebooks/figures/performance_by_num_variates_MASE.pdf +0 -0
- tabpfn_time_series-0.1.3/local_notebooks/trend_column.ipynb +0 -1055
- tabpfn_time_series-0.1.3/local_scripts/aggregate_result.py +0 -112
- tabpfn_time_series-0.1.3/local_scripts/run_eval_on_slurm.py +0 -111
- tabpfn_time_series-0.1.3/playground.ipynb +0 -470
- tabpfn_time_series-0.1.3/requirements.txt +0 -7
- tabpfn_time_series-0.1.3/tabpfn-v2-regressor-2noar4o2.ckpt +0 -0
- tabpfn_time_series-0.1.3/tabpfn_time_series/feature.py +0 -78
- {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/.gitignore +0 -0
- {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/.pre-commit-config.yaml +0 -0
- {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/LICENSE.txt +0 -0
- {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/demo.ipynb +0 -0
- {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/ruff.toml +0 -0
- {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/tabpfn_time_series/data_preparation.py +0 -0
- {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/tabpfn_time_series/defaults.py +0 -0
- {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/tabpfn_time_series/plot.py +0 -0
- {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/tabpfn_time_series/predictor.py +0 -0
- {tabpfn_time_series-0.1.3 → tabpfn_time_series-1.0.0}/tabpfn_time_series/tabpfn_worker.py +0 -0
@@ -27,7 +27,7 @@ jobs:
|
|
27
27
|
fail-fast: false
|
28
28
|
matrix:
|
29
29
|
os: [ubuntu-latest, macos-latest, windows-latest]
|
30
|
-
python-version: ["3.
|
30
|
+
python-version: ["3.10", "3.11", "3.12"]
|
31
31
|
runs-on: ${{ matrix.os }}
|
32
32
|
steps:
|
33
33
|
- uses: actions/checkout@v4
|
@@ -56,4 +56,4 @@ jobs:
|
|
56
56
|
env:
|
57
57
|
TABPFN_CLIENT_API_KEY: ${{ secrets.TABPFN_CLIENT_API_KEY }}
|
58
58
|
run: |
|
59
|
-
python -m unittest discover -s tests -t tests
|
59
|
+
python -m unittest discover -s tests -t tests
|
@@ -1,7 +1,7 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: tabpfn_time_series
|
3
|
-
Version: 0.
|
4
|
-
Summary: Zero-shot time series forecasting with
|
3
|
+
Version: 1.0.0
|
4
|
+
Summary: Zero-shot time series forecasting with TabPFNv2
|
5
5
|
Project-URL: Homepage, https://github.com/liam-sbhoo/tabpfn-time-series
|
6
6
|
Project-URL: Bug Tracker, https://github.com/liam-sbhoo/tabpfn-time-series/issues
|
7
7
|
Author-email: Liam Shi Bin Hoo <hoos@tf.uni-freiburg.de>
|
@@ -14,32 +14,39 @@ Requires-Dist: autogluon-timeseries>=1.2
|
|
14
14
|
Requires-Dist: datasets>=3.3.2
|
15
15
|
Requires-Dist: gluonts>=0.16.0
|
16
16
|
Requires-Dist: pandas<2.2.0,>=2.1.2
|
17
|
-
Requires-Dist:
|
18
|
-
Requires-Dist:
|
17
|
+
Requires-Dist: python-dotenv>=1.1.0
|
18
|
+
Requires-Dist: pyyaml>=6.0.1
|
19
|
+
Requires-Dist: tabpfn-client>=0.1.7
|
20
|
+
Requires-Dist: tabpfn>=2.0.9
|
19
21
|
Requires-Dist: tqdm
|
20
22
|
Provides-Extra: dev
|
21
23
|
Requires-Dist: build; extra == 'dev'
|
22
24
|
Requires-Dist: jupyter; extra == 'dev'
|
23
25
|
Requires-Dist: pre-commit; extra == 'dev'
|
24
26
|
Requires-Dist: ruff; extra == 'dev'
|
27
|
+
Requires-Dist: submitit>=1.5.2; extra == 'dev'
|
25
28
|
Requires-Dist: twine; extra == 'dev'
|
29
|
+
Requires-Dist: wandb>=0.19.8; extra == 'dev'
|
26
30
|
Description-Content-Type: text/markdown
|
27
31
|
|
28
|
-
#
|
32
|
+
# TabPFN-TS
|
33
|
+
|
34
|
+
> Zero-Shot Time Series Forecasting with TabPFNv2
|
29
35
|
|
30
36
|
[](https://badge.fury.io/py/tabpfn-time-series)
|
31
37
|
[](https://colab.research.google.com/github/liam-sbhoo/tabpfn-time-series/blob/main/demo.ipynb)
|
32
38
|
[](https://discord.com/channels/1285598202732482621/)
|
33
|
-
[](https://arxiv.org/abs/2501.
|
39
|
+
[](https://arxiv.org/abs/2501.02945v3)
|
34
40
|
|
35
41
|
## 📌 News
|
42
|
+
- **27-05-2025**: 📝 New **[paper](https://arxiv.org/abs/2501.02945v3)** version and **v1.0.0** release! Strong [GIFT-EVAL](https://huggingface.co/spaces/Salesforce/GIFT-Eval) results, new AutoSeasonalFeatures, improved CalendarFeatures.
|
36
43
|
- **27-01-2025**: 🚀 Ranked _**1st**_ on [GIFT-EVAL](https://huggingface.co/spaces/Salesforce/GIFT-Eval) benchmark<sup>[1]</sup>!
|
37
|
-
- **10-10-2024**: 🚀 TabPFN-TS [paper](https://arxiv.org/abs/2501.
|
44
|
+
- **10-10-2024**: 🚀 TabPFN-TS [paper](https://arxiv.org/abs/2501.02945v2) accepted to NeurIPS 2024 [TRL](https://table-representation-learning.github.io/NeurIPS2024/) and [TSALM](https://neurips-time-series-workshop.github.io/) workshops!
|
38
45
|
|
39
46
|
_[1] Last checked on: 10/03/2025_
|
40
47
|
|
41
48
|
## ✨ Introduction
|
42
|
-
We demonstrate that the tabular foundation model **[
|
49
|
+
We demonstrate that the tabular foundation model **[TabPFNv2](https://github.com/PriorLabs/TabPFN)**, combined with lightweight feature engineering, enables zero-shot time series forecasting for both point and probabilistic tasks. On the **[GIFT-EVAL](https://huggingface.co/spaces/Salesforce/GIFT-Eval)** benchmark, our method achieves performance on par with top-tier models across both evaluation metrics.
|
43
50
|
|
44
51
|
## 📖 How does it work?
|
45
52
|
|
@@ -49,27 +56,22 @@ Our work proposes to frame **univariate time series forecasting** as a **tabular
|
|
49
56
|
|
50
57
|
Concretely, we:
|
51
58
|
1. Transform a time series into a table
|
52
|
-
2. Extract features
|
53
|
-
3. Perform regression on the table using
|
59
|
+
2. Extract features and add them to the table
|
60
|
+
3. Perform regression on the table using TabPFNv2
|
54
61
|
4. Use regression results as time series forecasting outputs
|
55
62
|
|
56
|
-
For more details, please refer to our [paper](https://arxiv.org/abs/2501.
|
63
|
+
For more details, please refer to our [paper](https://arxiv.org/abs/2501.02945v3).
|
64
|
+
<!-- and our [poster](docs/tabpfn-ts-neurips-poster.pdf) (presented at NeurIPS 2024 TRL and TSALM workshops). -->
|
57
65
|
|
58
66
|
## 👉 **Why gives us a try?**
|
59
67
|
- **Zero-shot forecasting**: this method is extremely fast and requires no training, making it highly accessible for experimenting with your own problems.
|
60
68
|
- **Point and probabilistic forecasting**: it provides accurate point forecasts as well as probabilistic forecasts.
|
61
69
|
- **Support for exogenous variables**: if you have exogenous variables, this method can seemlessly incorporate them into the forecasting model.
|
62
70
|
|
63
|
-
On top of that, thanks to **[tabpfn-client](https://github.com/automl/tabpfn-client)** from **[Prior Labs](https://priorlabs.ai)**, you won’t even need your own GPU to run fast inference with
|
71
|
+
On top of that, thanks to **[tabpfn-client](https://github.com/automl/tabpfn-client)** from **[Prior Labs](https://priorlabs.ai)**, you won’t even need your own GPU to run fast inference with TabPFNv2. 😉 We have included `tabpfn-client` as the default engine in our implementation.
|
64
72
|
|
65
73
|
## How to use it?
|
66
74
|
|
67
75
|
[](https://colab.research.google.com/github/liam-sbhoo/tabpfn-time-series/blob/main/demo.ipynb)
|
68
76
|
|
69
77
|
The demo should explain it all. 😉
|
70
|
-
|
71
|
-
## 📊 GIFT-EVAL Benchmark
|
72
|
-
|
73
|
-
We have submitted our results to the [GIFT-EVAL](https://huggingface.co/spaces/Salesforce/GIFT-Eval) benchmark. Stay tuned for results!
|
74
|
-
|
75
|
-
For more details regarding the evaluation setup, please refer to [README.md](gift_eval/README.md).
|
@@ -1,18 +1,21 @@
|
|
1
|
-
#
|
1
|
+
# TabPFN-TS
|
2
|
+
|
3
|
+
> Zero-Shot Time Series Forecasting with TabPFNv2
|
2
4
|
|
3
5
|
[](https://badge.fury.io/py/tabpfn-time-series)
|
4
6
|
[](https://colab.research.google.com/github/liam-sbhoo/tabpfn-time-series/blob/main/demo.ipynb)
|
5
7
|
[](https://discord.com/channels/1285598202732482621/)
|
6
|
-
[](https://arxiv.org/abs/2501.
|
8
|
+
[](https://arxiv.org/abs/2501.02945v3)
|
7
9
|
|
8
10
|
## 📌 News
|
11
|
+
- **27-05-2025**: 📝 New **[paper](https://arxiv.org/abs/2501.02945v3)** version and **v1.0.0** release! Strong [GIFT-EVAL](https://huggingface.co/spaces/Salesforce/GIFT-Eval) results, new AutoSeasonalFeatures, improved CalendarFeatures.
|
9
12
|
- **27-01-2025**: 🚀 Ranked _**1st**_ on [GIFT-EVAL](https://huggingface.co/spaces/Salesforce/GIFT-Eval) benchmark<sup>[1]</sup>!
|
10
|
-
- **10-10-2024**: 🚀 TabPFN-TS [paper](https://arxiv.org/abs/2501.
|
13
|
+
- **10-10-2024**: 🚀 TabPFN-TS [paper](https://arxiv.org/abs/2501.02945v2) accepted to NeurIPS 2024 [TRL](https://table-representation-learning.github.io/NeurIPS2024/) and [TSALM](https://neurips-time-series-workshop.github.io/) workshops!
|
11
14
|
|
12
15
|
_[1] Last checked on: 10/03/2025_
|
13
16
|
|
14
17
|
## ✨ Introduction
|
15
|
-
We demonstrate that the tabular foundation model **[
|
18
|
+
We demonstrate that the tabular foundation model **[TabPFNv2](https://github.com/PriorLabs/TabPFN)**, combined with lightweight feature engineering, enables zero-shot time series forecasting for both point and probabilistic tasks. On the **[GIFT-EVAL](https://huggingface.co/spaces/Salesforce/GIFT-Eval)** benchmark, our method achieves performance on par with top-tier models across both evaluation metrics.
|
16
19
|
|
17
20
|
## 📖 How does it work?
|
18
21
|
|
@@ -22,27 +25,22 @@ Our work proposes to frame **univariate time series forecasting** as a **tabular
|
|
22
25
|
|
23
26
|
Concretely, we:
|
24
27
|
1. Transform a time series into a table
|
25
|
-
2. Extract features
|
26
|
-
3. Perform regression on the table using
|
28
|
+
2. Extract features and add them to the table
|
29
|
+
3. Perform regression on the table using TabPFNv2
|
27
30
|
4. Use regression results as time series forecasting outputs
|
28
31
|
|
29
|
-
For more details, please refer to our [paper](https://arxiv.org/abs/2501.
|
32
|
+
For more details, please refer to our [paper](https://arxiv.org/abs/2501.02945v3).
|
33
|
+
<!-- and our [poster](docs/tabpfn-ts-neurips-poster.pdf) (presented at NeurIPS 2024 TRL and TSALM workshops). -->
|
30
34
|
|
31
35
|
## 👉 **Why gives us a try?**
|
32
36
|
- **Zero-shot forecasting**: this method is extremely fast and requires no training, making it highly accessible for experimenting with your own problems.
|
33
37
|
- **Point and probabilistic forecasting**: it provides accurate point forecasts as well as probabilistic forecasts.
|
34
38
|
- **Support for exogenous variables**: if you have exogenous variables, this method can seemlessly incorporate them into the forecasting model.
|
35
39
|
|
36
|
-
On top of that, thanks to **[tabpfn-client](https://github.com/automl/tabpfn-client)** from **[Prior Labs](https://priorlabs.ai)**, you won’t even need your own GPU to run fast inference with
|
40
|
+
On top of that, thanks to **[tabpfn-client](https://github.com/automl/tabpfn-client)** from **[Prior Labs](https://priorlabs.ai)**, you won’t even need your own GPU to run fast inference with TabPFNv2. 😉 We have included `tabpfn-client` as the default engine in our implementation.
|
37
41
|
|
38
42
|
## How to use it?
|
39
43
|
|
40
44
|
[](https://colab.research.google.com/github/liam-sbhoo/tabpfn-time-series/blob/main/demo.ipynb)
|
41
45
|
|
42
46
|
The demo should explain it all. 😉
|
43
|
-
|
44
|
-
## 📊 GIFT-EVAL Benchmark
|
45
|
-
|
46
|
-
We have submitted our results to the [GIFT-EVAL](https://huggingface.co/spaces/Salesforce/GIFT-Eval) benchmark. Stay tuned for results!
|
47
|
-
|
48
|
-
For more details regarding the evaluation setup, please refer to [README.md](gift_eval/README.md).
|
@@ -4,11 +4,11 @@ build-backend = "hatchling.build"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "tabpfn_time_series"
|
7
|
-
version = "0.
|
7
|
+
version = "1.0.0"
|
8
8
|
authors = [
|
9
9
|
{ name="Liam Shi Bin Hoo", email="hoos@tf.uni-freiburg.de" },
|
10
10
|
]
|
11
|
-
description = "Zero-shot time series forecasting with
|
11
|
+
description = "Zero-shot time series forecasting with TabPFNv2"
|
12
12
|
readme = "README.md"
|
13
13
|
requires-python = ">=3.10"
|
14
14
|
classifiers = [
|
@@ -1,11 +1,10 @@
|
|
1
|
-
from .
|
1
|
+
from .features import FeatureTransformer
|
2
2
|
from .predictor import TabPFNTimeSeriesPredictor, TabPFNMode
|
3
3
|
from .defaults import TABPFN_TS_DEFAULT_QUANTILE_CONFIG
|
4
4
|
|
5
5
|
__version__ = "0.1.0"
|
6
6
|
|
7
7
|
__all__ = [
|
8
|
-
"DefaultFeatures",
|
9
8
|
"FeatureTransformer",
|
10
9
|
"TabPFNTimeSeriesPredictor",
|
11
10
|
"TabPFNMode",
|
@@ -0,0 +1,17 @@
|
|
1
|
+
from .basic_features import (
|
2
|
+
RunningIndexFeature,
|
3
|
+
CalendarFeature,
|
4
|
+
AdditionalCalendarFeature,
|
5
|
+
PeriodicSinCosineFeature,
|
6
|
+
)
|
7
|
+
from .auto_features import AutoSeasonalFeature
|
8
|
+
from .feature_transformer import FeatureTransformer
|
9
|
+
|
10
|
+
__all__ = [
|
11
|
+
"RunningIndexFeature",
|
12
|
+
"CalendarFeature",
|
13
|
+
"AdditionalCalendarFeature",
|
14
|
+
"AutoSeasonalFeature",
|
15
|
+
"PeriodicSinCosineFeature",
|
16
|
+
"FeatureTransformer",
|
17
|
+
]
|
@@ -0,0 +1,307 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import pandas as pd
|
3
|
+
from typing import List, Optional, Tuple, Literal
|
4
|
+
|
5
|
+
import logging
|
6
|
+
|
7
|
+
from scipy import fft
|
8
|
+
from scipy.signal import find_peaks
|
9
|
+
from statsmodels.tsa.stattools import acf
|
10
|
+
|
11
|
+
from tabpfn_time_series.features.feature_generator_base import (
|
12
|
+
FeatureGenerator,
|
13
|
+
)
|
14
|
+
from tabpfn_time_series.features.basic_features import (
|
15
|
+
PeriodicSinCosineFeature,
|
16
|
+
)
|
17
|
+
|
18
|
+
|
19
|
+
logger = logging.getLogger(__name__)
|
20
|
+
|
21
|
+
|
22
|
+
class AutoSeasonalFeature(FeatureGenerator):
|
23
|
+
class Config:
|
24
|
+
max_top_k: int = 5
|
25
|
+
do_detrend: bool = True
|
26
|
+
detrend_type: Literal["first_diff", "loess", "linear", "constant"] = "linear"
|
27
|
+
use_peaks_only: bool = True
|
28
|
+
apply_hann_window: bool = True
|
29
|
+
zero_padding_factor: int = 2
|
30
|
+
round_to_closest_integer: bool = True
|
31
|
+
validate_with_acf: bool = False
|
32
|
+
sampling_interval: float = 1.0
|
33
|
+
magnitude_threshold: Optional[float] = 0.05
|
34
|
+
relative_threshold: bool = True
|
35
|
+
exclude_zero: bool = True
|
36
|
+
|
37
|
+
def __init__(self, config: Optional[dict] = None):
|
38
|
+
# Create default config from Config class
|
39
|
+
default_config = {
|
40
|
+
k: v for k, v in vars(self.Config).items() if not k.startswith("__")
|
41
|
+
}
|
42
|
+
|
43
|
+
# Initialize config with defaults
|
44
|
+
self.config = default_config.copy()
|
45
|
+
|
46
|
+
# Update with user-provided config if any
|
47
|
+
if config is not None:
|
48
|
+
self.config.update(config)
|
49
|
+
|
50
|
+
# Validate config parameters
|
51
|
+
self._validate_config()
|
52
|
+
|
53
|
+
logger.debug(f"Initialized AutoSeasonalFeature with config: {self.config}")
|
54
|
+
|
55
|
+
def _validate_config(self):
|
56
|
+
"""Validate configuration parameters"""
|
57
|
+
if self.config["max_top_k"] < 1:
|
58
|
+
logger.warning("max_top_k must be at least 1, setting to 1")
|
59
|
+
self.config["max_top_k"] = 1
|
60
|
+
|
61
|
+
if self.config["zero_padding_factor"] < 1:
|
62
|
+
logger.warning("zero_padding_factor must be at least 1, setting to 1")
|
63
|
+
self.config["zero_padding_factor"] = 1
|
64
|
+
|
65
|
+
if self.config["detrend_type"] not in [
|
66
|
+
"first_diff",
|
67
|
+
"loess",
|
68
|
+
"linear",
|
69
|
+
"constant",
|
70
|
+
]:
|
71
|
+
logger.warning(
|
72
|
+
f"Invalid detrend_type: {self.config['detrend_type']}, using 'linear'"
|
73
|
+
)
|
74
|
+
self.config["detrend_type"] = "linear"
|
75
|
+
|
76
|
+
def generate(self, df: pd.DataFrame) -> pd.DataFrame:
|
77
|
+
df = df.copy()
|
78
|
+
|
79
|
+
# Detect seasonal periods from target data
|
80
|
+
detected_periods_and_magnitudes = self.find_seasonal_periods(
|
81
|
+
df.target, **self.config
|
82
|
+
)
|
83
|
+
logger.debug(
|
84
|
+
f"Found {len(detected_periods_and_magnitudes)} seasonal periods: {detected_periods_and_magnitudes}"
|
85
|
+
)
|
86
|
+
|
87
|
+
# Extract just the periods (without magnitudes)
|
88
|
+
periods = [period for period, _ in detected_periods_and_magnitudes]
|
89
|
+
|
90
|
+
# Generate features for detected periods using PeriodicSinCosineFeature
|
91
|
+
if periods:
|
92
|
+
feature_generator = PeriodicSinCosineFeature(periods=periods)
|
93
|
+
df = feature_generator.generate(df)
|
94
|
+
|
95
|
+
# Standardize column names for consistency across time series
|
96
|
+
renamed_columns = {}
|
97
|
+
for i, period in enumerate(periods):
|
98
|
+
renamed_columns[f"sin_{period}"] = f"sin_#{i}"
|
99
|
+
renamed_columns[f"cos_{period}"] = f"cos_#{i}"
|
100
|
+
|
101
|
+
df = df.rename(columns=renamed_columns)
|
102
|
+
|
103
|
+
# Add placeholder zero columns for missing periods up to max_top_k
|
104
|
+
for i in range(len(periods), self.config["max_top_k"]):
|
105
|
+
df[f"sin_#{i}"] = 0.0
|
106
|
+
df[f"cos_#{i}"] = 0.0
|
107
|
+
|
108
|
+
return df
|
109
|
+
|
110
|
+
@staticmethod
|
111
|
+
def find_seasonal_periods(
|
112
|
+
target_values: pd.Series,
|
113
|
+
max_top_k: int = 10,
|
114
|
+
do_detrend: bool = True,
|
115
|
+
detrend_type: Literal[
|
116
|
+
"first_diff", "loess", "linear", "constant"
|
117
|
+
] = "first_diff",
|
118
|
+
use_peaks_only: bool = True,
|
119
|
+
apply_hann_window: bool = True,
|
120
|
+
zero_padding_factor: int = 2,
|
121
|
+
round_to_closest_integer: bool = True,
|
122
|
+
validate_with_acf: bool = False,
|
123
|
+
sampling_interval: float = 1.0,
|
124
|
+
magnitude_threshold: Optional[
|
125
|
+
float
|
126
|
+
] = 0.05, # Default relative threshold (5% of max)
|
127
|
+
relative_threshold: bool = True, # Interpret threshold as a fraction of max FFT magnitude
|
128
|
+
exclude_zero: bool = False,
|
129
|
+
) -> List[Tuple[float, float]]:
|
130
|
+
"""
|
131
|
+
Identify dominant seasonal periods in a time series using FFT.
|
132
|
+
|
133
|
+
Parameters:
|
134
|
+
- target_values: pd.Series
|
135
|
+
Input time series data.
|
136
|
+
- max_top_k: int
|
137
|
+
Maximum number of dominant periods to return.
|
138
|
+
- do_detrend: bool
|
139
|
+
If True, remove the linear trend from the signal.
|
140
|
+
- use_peaks_only: bool
|
141
|
+
If True, consider only local peaks in the FFT magnitude spectrum.
|
142
|
+
- apply_hann_window: bool
|
143
|
+
If True, apply a Hann window to reduce spectral leakage.
|
144
|
+
- zero_padding_factor: int
|
145
|
+
Factor by which to zero-pad the signal for finer frequency resolution.
|
146
|
+
- round_to_closest_integer: bool
|
147
|
+
If True, round the detected periods to the nearest integer.
|
148
|
+
- validate_with_acf: bool
|
149
|
+
If True, validate detected periods against the autocorrelation function.
|
150
|
+
- sampling_interval: float
|
151
|
+
Time interval between consecutive samples.
|
152
|
+
- magnitude_threshold: Optional[float]
|
153
|
+
Threshold to filter out less significant frequency components.
|
154
|
+
Default is 0.05, interpreted as 5% of the maximum FFT magnitude if relative_threshold is True.
|
155
|
+
- relative_threshold: bool
|
156
|
+
If True, the `magnitude_threshold` is interpreted as a fraction of the maximum FFT magnitude.
|
157
|
+
Otherwise, it is treated as an absolute threshold value.
|
158
|
+
- exclude_zero: bool
|
159
|
+
If True, exclude periods of 0 from the results.
|
160
|
+
|
161
|
+
Returns:
|
162
|
+
- List[Tuple[float, float]]:
|
163
|
+
A list of (period, magnitude) tuples, sorted in descending order by magnitude.
|
164
|
+
"""
|
165
|
+
# Convert the Pandas Series to a NumPy array
|
166
|
+
values = np.array(target_values, dtype=float)
|
167
|
+
|
168
|
+
# Quick hack to ignore the test_X
|
169
|
+
# (Assuming train_X target is not NaN, and test_X target is NaN)
|
170
|
+
# Dropping all the NaN values
|
171
|
+
values = values[~np.isnan(values)]
|
172
|
+
|
173
|
+
N_original = len(values)
|
174
|
+
|
175
|
+
# Detrend the signal using a linear detrend method if requested
|
176
|
+
if do_detrend:
|
177
|
+
values = detrend(values, detrend_type)
|
178
|
+
|
179
|
+
# Apply a Hann window to reduce spectral leakage
|
180
|
+
if apply_hann_window:
|
181
|
+
window = np.hanning(N_original)
|
182
|
+
values = values * window
|
183
|
+
|
184
|
+
# Zero-pad the signal for improved frequency resolution
|
185
|
+
if zero_padding_factor > 1:
|
186
|
+
padded_length = int(N_original * zero_padding_factor)
|
187
|
+
padded_values = np.zeros(padded_length)
|
188
|
+
padded_values[:N_original] = values
|
189
|
+
values = padded_values
|
190
|
+
N = padded_length
|
191
|
+
else:
|
192
|
+
N = N_original
|
193
|
+
|
194
|
+
# Compute the FFT (using rfft) and obtain frequency bins
|
195
|
+
fft_values = fft.rfft(values)
|
196
|
+
fft_magnitudes = np.abs(fft_values)
|
197
|
+
freqs = np.fft.rfftfreq(N, d=sampling_interval)
|
198
|
+
|
199
|
+
# Exclude the DC component (0 Hz) to avoid bias from the signal's mean
|
200
|
+
fft_magnitudes[0] = 0.0
|
201
|
+
|
202
|
+
# Determine the threshold (absolute value)
|
203
|
+
if magnitude_threshold is not None and relative_threshold:
|
204
|
+
threshold_value = magnitude_threshold * np.max(fft_magnitudes)
|
205
|
+
else:
|
206
|
+
threshold_value = magnitude_threshold
|
207
|
+
|
208
|
+
# Identify dominant frequencies
|
209
|
+
if use_peaks_only:
|
210
|
+
if threshold_value is not None:
|
211
|
+
peak_indices, _ = find_peaks(fft_magnitudes, height=threshold_value)
|
212
|
+
else:
|
213
|
+
peak_indices, _ = find_peaks(fft_magnitudes)
|
214
|
+
if len(peak_indices) == 0:
|
215
|
+
# Fallback to considering all frequency bins if no peaks are found
|
216
|
+
peak_indices = np.arange(len(fft_magnitudes))
|
217
|
+
# Sort the peak indices by magnitude in descending order
|
218
|
+
sorted_peak_indices = peak_indices[
|
219
|
+
np.argsort(fft_magnitudes[peak_indices])[::-1]
|
220
|
+
]
|
221
|
+
top_indices = sorted_peak_indices[:max_top_k]
|
222
|
+
else:
|
223
|
+
sorted_indices = np.argsort(fft_magnitudes)[::-1]
|
224
|
+
if threshold_value is not None:
|
225
|
+
sorted_indices = [
|
226
|
+
i for i in sorted_indices if fft_magnitudes[i] >= threshold_value
|
227
|
+
]
|
228
|
+
top_indices = sorted_indices[:max_top_k]
|
229
|
+
|
230
|
+
# Convert frequencies to periods (avoiding division by zero)
|
231
|
+
periods = np.zeros_like(freqs)
|
232
|
+
non_zero = freqs > 0
|
233
|
+
periods[non_zero] = 1.0 / freqs[non_zero]
|
234
|
+
top_periods = periods[top_indices]
|
235
|
+
|
236
|
+
logger.debug(f"Top periods: {top_periods}")
|
237
|
+
|
238
|
+
# Optionally round the periods to the nearest integer
|
239
|
+
if round_to_closest_integer:
|
240
|
+
top_periods = np.round(top_periods)
|
241
|
+
|
242
|
+
# Filter out zero periods if requested
|
243
|
+
if exclude_zero:
|
244
|
+
non_zero_mask = top_periods != 0
|
245
|
+
top_periods = top_periods[non_zero_mask]
|
246
|
+
top_indices = top_indices[non_zero_mask]
|
247
|
+
|
248
|
+
# Keep unique periods only
|
249
|
+
if len(top_periods) > 0:
|
250
|
+
unique_period_indices = np.unique(top_periods, return_index=True)[1]
|
251
|
+
top_periods = top_periods[unique_period_indices]
|
252
|
+
top_indices = top_indices[unique_period_indices]
|
253
|
+
|
254
|
+
# Pair each period with its corresponding magnitude
|
255
|
+
results = [
|
256
|
+
(top_periods[i], fft_magnitudes[top_indices[i]])
|
257
|
+
for i in range(len(top_indices))
|
258
|
+
]
|
259
|
+
|
260
|
+
# Validate with ACF if requested and filter the results accordingly
|
261
|
+
if validate_with_acf:
|
262
|
+
# Compute ACF on the original (non-padded) detrended signal
|
263
|
+
acf_values = acf(
|
264
|
+
np.array(target_values, dtype=float)[:N_original],
|
265
|
+
nlags=N_original,
|
266
|
+
fft=True,
|
267
|
+
)
|
268
|
+
acf_peak_indices, _ = find_peaks(
|
269
|
+
acf_values, height=1.96 / np.sqrt(N_original)
|
270
|
+
)
|
271
|
+
validated_results = []
|
272
|
+
for period, mag in results:
|
273
|
+
period_int = int(round(period))
|
274
|
+
if period_int < len(acf_values) and any(
|
275
|
+
abs(period_int - peak) <= 1 for peak in acf_peak_indices
|
276
|
+
):
|
277
|
+
validated_results.append((period, mag))
|
278
|
+
if validated_results:
|
279
|
+
results = validated_results
|
280
|
+
|
281
|
+
# Ensure the final results are sorted in descending order by magnitude
|
282
|
+
results.sort(key=lambda x: x[1], reverse=True)
|
283
|
+
|
284
|
+
return results
|
285
|
+
|
286
|
+
|
287
|
+
def detrend(
|
288
|
+
x: np.ndarray, detrend_type: Literal["first_diff", "loess", "linear"]
|
289
|
+
) -> np.ndarray:
|
290
|
+
if detrend_type == "first_diff":
|
291
|
+
return np.diff(x, prepend=x[0])
|
292
|
+
|
293
|
+
elif detrend_type == "loess":
|
294
|
+
from statsmodels.api import nonparametric
|
295
|
+
|
296
|
+
indices = np.arange(len(x))
|
297
|
+
lowess = nonparametric.lowess(x, indices, frac=0.1)
|
298
|
+
trend = lowess[:, 1]
|
299
|
+
return x - trend
|
300
|
+
|
301
|
+
elif detrend_type in ["linear", "constant"]:
|
302
|
+
from scipy.signal import detrend as scipy_detrend
|
303
|
+
|
304
|
+
return scipy_detrend(x, type=detrend_type)
|
305
|
+
|
306
|
+
else:
|
307
|
+
raise ValueError(f"Invalid detrend method: {detrend_type}")
|
@@ -0,0 +1,88 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import pandas as pd
|
3
|
+
from typing import List, Dict, Optional
|
4
|
+
|
5
|
+
import gluonts.time_feature
|
6
|
+
|
7
|
+
from tabpfn_time_series.features.feature_generator_base import (
|
8
|
+
FeatureGenerator,
|
9
|
+
)
|
10
|
+
|
11
|
+
|
12
|
+
class RunningIndexFeature(FeatureGenerator):
|
13
|
+
def generate(self, df: pd.DataFrame) -> pd.DataFrame:
|
14
|
+
df = df.copy()
|
15
|
+
df["running_index"] = range(len(df))
|
16
|
+
return df
|
17
|
+
|
18
|
+
|
19
|
+
class CalendarFeature(FeatureGenerator):
|
20
|
+
def __init__(
|
21
|
+
self,
|
22
|
+
components: Optional[List[str]] = None,
|
23
|
+
seasonal_features: Optional[Dict[str, List[float]]] = None,
|
24
|
+
):
|
25
|
+
self.components = components or ["year"]
|
26
|
+
self.seasonal_features = seasonal_features or {
|
27
|
+
# (feature, natural seasonality)
|
28
|
+
"second_of_minute": [60],
|
29
|
+
"minute_of_hour": [60],
|
30
|
+
"hour_of_day": [24],
|
31
|
+
"day_of_week": [7],
|
32
|
+
"day_of_month": [30.5],
|
33
|
+
"day_of_year": [365],
|
34
|
+
"week_of_year": [52],
|
35
|
+
"month_of_year": [12],
|
36
|
+
}
|
37
|
+
|
38
|
+
def generate(self, df: pd.DataFrame) -> pd.DataFrame:
|
39
|
+
df = df.copy()
|
40
|
+
timestamps = df.index.get_level_values("timestamp")
|
41
|
+
|
42
|
+
# Add basic calendar components
|
43
|
+
for component in self.components:
|
44
|
+
df[component] = getattr(timestamps, component)
|
45
|
+
|
46
|
+
# Add seasonal features
|
47
|
+
for feature_name, periods in self.seasonal_features.items():
|
48
|
+
feature_func = getattr(gluonts.time_feature, f"{feature_name}_index")
|
49
|
+
feature = feature_func(timestamps).astype(np.int32)
|
50
|
+
|
51
|
+
if periods is not None:
|
52
|
+
for period in periods:
|
53
|
+
period = period - 1 # Adjust for 0-based indexing
|
54
|
+
df[f"{feature_name}_sin"] = np.sin(2 * np.pi * feature / period)
|
55
|
+
df[f"{feature_name}_cos"] = np.cos(2 * np.pi * feature / period)
|
56
|
+
else:
|
57
|
+
df[feature_name] = feature
|
58
|
+
|
59
|
+
return df
|
60
|
+
|
61
|
+
|
62
|
+
class AdditionalCalendarFeature(CalendarFeature):
|
63
|
+
def __init__(
|
64
|
+
self,
|
65
|
+
components: Optional[List[str]] = None,
|
66
|
+
additional_seasonal_features: Optional[Dict[str, List[float]]] = None,
|
67
|
+
):
|
68
|
+
super().__init__(components=components)
|
69
|
+
|
70
|
+
self.seasonal_features = {
|
71
|
+
**additional_seasonal_features,
|
72
|
+
**self.seasonal_features,
|
73
|
+
}
|
74
|
+
|
75
|
+
|
76
|
+
class PeriodicSinCosineFeature(FeatureGenerator):
|
77
|
+
def __init__(self, periods: List[float], name_suffix: str = None):
|
78
|
+
self.periods = periods
|
79
|
+
self.name_suffix = name_suffix
|
80
|
+
|
81
|
+
def generate(self, df: pd.DataFrame) -> pd.DataFrame:
|
82
|
+
df = df.copy()
|
83
|
+
for i, period in enumerate(self.periods):
|
84
|
+
name_suffix = f"{self.name_suffix}_{i}" if self.name_suffix else f"{period}"
|
85
|
+
df[f"sin_{name_suffix}"] = np.sin(2 * np.pi * np.arange(len(df)) / period)
|
86
|
+
df[f"cos_{name_suffix}"] = np.cos(2 * np.pi * np.arange(len(df)) / period)
|
87
|
+
|
88
|
+
return df
|
@@ -0,0 +1,21 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
|
3
|
+
import pandas as pd
|
4
|
+
|
5
|
+
|
6
|
+
class FeatureGenerator(ABC):
|
7
|
+
"""Abstract base class for feature generators"""
|
8
|
+
|
9
|
+
@abstractmethod
|
10
|
+
def generate(self, df: pd.DataFrame) -> pd.DataFrame:
|
11
|
+
"""Generate features for the given dataframe"""
|
12
|
+
pass
|
13
|
+
|
14
|
+
def __call__(self, df: pd.DataFrame) -> pd.DataFrame:
|
15
|
+
return self.generate(df)
|
16
|
+
|
17
|
+
def __str__(self) -> str:
|
18
|
+
return f"{self.__class__.__name__}_{self.__dict__}"
|
19
|
+
|
20
|
+
def __repr__(self) -> str:
|
21
|
+
return self.__str__()
|