tsbootstrap 0.1.0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tsbootstrap-0.1.0/src/tsbootstrap.egg-info → tsbootstrap-0.1.2}/PKG-INFO +189 -150
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/README.md +170 -143
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/pyproject.toml +38 -47
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/base_bootstrap.py +166 -49
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/base_bootstrap_configs.py +4 -4
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/block_bootstrap.py +162 -86
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/block_bootstrap_configs.py +15 -14
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/block_generator.py +43 -31
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/block_length_sampler.py +2 -2
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/block_resampler.py +140 -94
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/bootstrap.py +67 -87
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/markov_sampler.py +40 -36
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/ranklags.py +7 -1
- tsbootstrap-0.1.2/src/tsbootstrap/registry/tests/test_tags.py +46 -0
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/tests/test_all_bootstraps.py +41 -4
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/time_series_model.py +17 -17
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/time_series_simulator.py +12 -12
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/tsfit.py +4 -2
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/utils/odds_and_ends.py +6 -3
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/utils/types.py +19 -6
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/utils/validate.py +18 -16
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2/src/tsbootstrap.egg-info}/PKG-INFO +189 -150
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap.egg-info/requires.txt +21 -7
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/tests/test_base_bootstrap_configs.py +0 -1
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/tests/test_block_bootstrap.py +1 -2
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/tests/test_block_generator.py +3 -3
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/tests/test_block_resampler.py +1 -16
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/tests/test_time_series_model.py +0 -1
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/tests/test_time_series_simulator.py +35 -6
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/tests/test_tsfit.py +0 -3
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/tests/test_validate.py +2 -1
- tsbootstrap-0.1.0/src/tsbootstrap/registry/tests/test_tags.py +0 -24
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/LICENSE +0 -0
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/setup.cfg +0 -0
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/__init__.py +0 -0
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/py.typed +0 -0
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/registry/__init__.py +0 -0
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/registry/_lookup.py +0 -0
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/registry/_tags.py +0 -0
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/registry/tests/__init__.py +0 -0
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/tests/__init__.py +0 -0
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/tests/scenarios/__init__.py +0 -0
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/tests/scenarios/scenarios.py +0 -0
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/tests/scenarios/scenarios_bootstrap.py +0 -0
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/tests/scenarios/scenarios_getter.py +0 -0
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/tests/test_all_estimators.py +0 -0
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/tests/test_class_register.py +0 -0
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/tests/test_switch.py +0 -0
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/utils/__init__.py +0 -0
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/utils/dependencies.py +0 -0
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/utils/estimator_checks.py +0 -0
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap.egg-info/SOURCES.txt +0 -0
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap.egg-info/dependency_links.txt +0 -0
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap.egg-info/top_level.txt +0 -0
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/tests/test_block_bootstrap_configs.py +0 -0
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/tests/test_block_length_sampler.py +0 -0
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/tests/test_bootstrap.py +0 -0
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/tests/test_markov_sampler.py +0 -0
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/tests/test_odds_and_ends.py +0 -0
- {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/tests/test_ranklags.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: tsbootstrap
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: A Python package to generate bootstrapped time series
|
|
5
5
|
Author-email: Sankalp Gilda <sankalp.gilda@gmail.com>
|
|
6
6
|
Maintainer-email: Sankalp Gilda <sankalp.gilda@gmail.com>, Franz Kiraly <franz.kiraly@sktime.net>, Benedikt Heidrich <benedikt.heidrich@sktime.net>
|
|
@@ -43,16 +43,17 @@ Requires-Python: <3.13,>=3.8
|
|
|
43
43
|
Description-Content-Type: text/markdown
|
|
44
44
|
License-File: LICENSE
|
|
45
45
|
Requires-Dist: numpy<1.27,>=1.21
|
|
46
|
-
Requires-Dist: scikit-base
|
|
47
|
-
Requires-Dist: scikit-learn
|
|
46
|
+
Requires-Dist: scikit-base<=0.9.0,>=0.8.0
|
|
47
|
+
Requires-Dist: scikit-learn<=1.5.1,>=0.24
|
|
48
48
|
Requires-Dist: scipy<2.0.0,>=1.2
|
|
49
49
|
Requires-Dist: packaging
|
|
50
50
|
Provides-Extra: all-extras
|
|
51
|
-
Requires-Dist: arch
|
|
51
|
+
Requires-Dist: arch<=7.0.0,>=5.0.0; extra == "all-extras"
|
|
52
52
|
Requires-Dist: hmmlearn<0.3.2,>=0.3.0; extra == "all-extras"
|
|
53
53
|
Requires-Dist: pyclustering<0.11.0,>=0.10.0; extra == "all-extras"
|
|
54
54
|
Requires-Dist: scikit_learn_extra<0.4.0,>=0.3.0; extra == "all-extras"
|
|
55
55
|
Requires-Dist: statsmodels<0.15.0,>=0.12.1; extra == "all-extras"
|
|
56
|
+
Requires-Dist: dtaidistance; python_version < "3.10" and extra == "all-extras"
|
|
56
57
|
Provides-Extra: docs
|
|
57
58
|
Requires-Dist: furo; extra == "docs"
|
|
58
59
|
Requires-Dist: jupyter; extra == "docs"
|
|
@@ -62,19 +63,30 @@ Requires-Dist: numpydoc; extra == "docs"
|
|
|
62
63
|
Requires-Dist: pydata-sphinx-theme; extra == "docs"
|
|
63
64
|
Requires-Dist: Sphinx!=7.2.0,<8.0.0; extra == "docs"
|
|
64
65
|
Requires-Dist: sphinx-rtd-theme>=1.3.0; extra == "docs"
|
|
65
|
-
Requires-Dist: sphinx-copybutton; extra == "docs"
|
|
66
|
+
Requires-Dist: sphinx-copybutton>=0.5.2; extra == "docs"
|
|
66
67
|
Requires-Dist: sphinx-design<0.6.0; extra == "docs"
|
|
67
68
|
Requires-Dist: sphinx-gallery<0.15.0; extra == "docs"
|
|
68
69
|
Requires-Dist: sphinx-issues<4.0.0; extra == "docs"
|
|
69
70
|
Requires-Dist: sphinx-version-warning; extra == "docs"
|
|
70
|
-
Requires-Dist: tabulate; extra == "docs"
|
|
71
|
+
Requires-Dist: tabulate>=0.9.0; extra == "docs"
|
|
71
72
|
Provides-Extra: dev
|
|
72
|
-
Requires-Dist: black; extra == "dev"
|
|
73
|
+
Requires-Dist: black>=24.3.0; extra == "dev"
|
|
73
74
|
Requires-Dist: blacken-docs; extra == "dev"
|
|
74
75
|
Requires-Dist: hypothesis; extra == "dev"
|
|
75
76
|
Requires-Dist: pre-commit; extra == "dev"
|
|
76
77
|
Requires-Dist: pytest; extra == "dev"
|
|
77
78
|
Requires-Dist: pytest-cov; extra == "dev"
|
|
79
|
+
Requires-Dist: github-actions; extra == "dev"
|
|
80
|
+
Requires-Dist: importlib-metadata; extra == "dev"
|
|
81
|
+
Requires-Dist: pip-tools; extra == "dev"
|
|
82
|
+
Requires-Dist: pyright; extra == "dev"
|
|
83
|
+
Requires-Dist: ruff; extra == "dev"
|
|
84
|
+
Requires-Dist: autoflake; extra == "dev"
|
|
85
|
+
Requires-Dist: typos; extra == "dev"
|
|
86
|
+
Requires-Dist: tox; extra == "dev"
|
|
87
|
+
Requires-Dist: tox-gh-actions; extra == "dev"
|
|
88
|
+
Requires-Dist: pycobertura; extra == "dev"
|
|
89
|
+
Requires-Dist: tomlkit; extra == "dev"
|
|
78
90
|
|
|
79
91
|
<!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
|
|
80
92
|
[](#contributors)
|
|
@@ -107,7 +119,7 @@ Requires-Dist: pytest-cov; extra == "dev"
|
|
|
107
119
|
</a>
|
|
108
120
|
<img src="https://github.com/astrogilda/tsbootstrap/workflows/CI/badge.svg" alt="Build Status"/>
|
|
109
121
|
<a href="https://codecov.io/gh/astrogilda/tsbootstrap"><img src="https://codecov.io/gh/astrogilda/tsbootstrap/branch/main/graph/badge.svg" alt="codecov"/></a>
|
|
110
|
-
<a href="https://doi.org/10.5281/zenodo.
|
|
122
|
+
<a href="https://doi.org/10.5281/zenodo.8226495"><img src="https://zenodo.org/badge/DOI/10.5281/zenodo.8226495.svg" alt="DOI"/></a>
|
|
111
123
|
<a href="https://codeclimate.com/github/astrogilda/tsbootstrap/maintainability"><img src="https://api.codeclimate.com/v1/badges/d80a0615a8c00f31565c/maintainability" alt="Code Quality"/></a>
|
|
112
124
|
<img src="https://img.shields.io/github/last-commit/astrogilda/tsbootstrap" alt="Last Commit"/>
|
|
113
125
|
<img src="https://img.shields.io/github/issues/astrogilda/tsbootstrap" alt="Issues"/>
|
|
@@ -118,94 +130,84 @@ Requires-Dist: pytest-cov; extra == "dev"
|
|
|
118
130
|
|
|
119
131
|
|
|
120
132
|
## 📒 Table of Contents
|
|
121
|
-
1. [📍 Time Series Bootstrapping](#time-series-bootstrapping)
|
|
122
|
-
- [Overview](#overview)
|
|
123
|
-
- [Bootstrapping Methodology](#bootstrapping-methodology)
|
|
124
|
-
- [Block Bootstrap](#block-bootstrap)
|
|
125
|
-
- [Moving Block Bootstrap](#moving-block-bootstrap)
|
|
126
|
-
- [Circular Block Bootstrap](#circular-block-bootstrap)
|
|
127
|
-
- [Stationary Block Bootstrap](#stationary-block-bootstrap)
|
|
128
|
-
- [NonOverlapping Block Bootstrap](#nonoverlapping-block-bootstrap)
|
|
129
|
-
- [Bartletts Bootstrap](#bartletts-bootstrap)
|
|
130
|
-
- [Blackman Bootstrap](#blackman-bootstrap)
|
|
131
|
-
- [Hamming Bootstrap](#hamming-bootstrap)
|
|
132
|
-
- [Hanning Bootstrap](#hanning-bootstrap)
|
|
133
|
-
- [Tukey Bootstrap](#tukey-bootstrap)
|
|
134
|
-
- [Residual Bootstrap](#residual-bootstrap)
|
|
135
|
-
- [Bias Corrected Bootstrap](#bias-corrected-bootstrap)
|
|
136
|
-
- [Distribution Bootstrap](#distribution-bootstrap)
|
|
137
|
-
- [Markov Bootstrap](#markov-bootstrap)
|
|
138
|
-
- [Sieve Bootstrap](#sieve-bootstrap)
|
|
139
|
-
3. [🧩 Modules](#-modules)
|
|
140
|
-
4. [🚀 Getting Started](#-getting-started)
|
|
141
|
-
5. [🗺 Roadmap](#-roadmap)
|
|
142
|
-
6. [🤝 Contributing](#-contributing)
|
|
143
|
-
7. [📄 License](#-license)
|
|
144
|
-
8. [👏 Acknowledgments](#-acknowledgments)
|
|
145
133
|
|
|
146
|
-
|
|
147
|
-
|
|
134
|
+
1. [🚀 Getting Started](#-getting-started)
|
|
135
|
+
2. [🧩 Modules](#-modules)
|
|
136
|
+
3. [🗺 Roadmap](#-roadmap)
|
|
137
|
+
4. [🤝 Contributing](#-contributing)
|
|
138
|
+
5. [📄 License](#-license)
|
|
139
|
+
6. [📍 Time Series Bootstrapping Methods intro](#time-series-bootstrapping)
|
|
140
|
+
7. [👏 Contributors](#-contributors)
|
|
148
141
|
|
|
149
142
|
|
|
150
|
-
## 📍 Time Series Bootstrapping
|
|
151
|
-
`tsbootstrap` is a comprehensive project designed to implement an array of bootstrapping techniques specifically tailored for time series data. This project is targeted towards data scientists, statisticians, economists, and other professionals or researchers who regularly work with time series data and require robust methods for generating bootstrapped copies of univariate and multivariate time series data.
|
|
152
143
|
|
|
153
|
-
|
|
154
|
-
Time series bootstrapping is a nuanced resampling method that is applied to time-dependent data. Traditional bootstrapping methods often assume independence between data points, which is an assumption that does not hold true for time series data where a data point is often dependent on previous data points. Time series bootstrapping techniques respect the chronological order and correlations of the data, providing more accurate estimates of uncertainty or variability.
|
|
144
|
+
---
|
|
155
145
|
|
|
156
|
-
|
|
157
|
-
The `tsbootstrap` project offers a diverse set of bootstrapping techniques that can be applied to either the entire input time series (classes prefixed with `Whole`), or after partitioning the data into blocks (classes prefixed with `Block`). These methodologies can be applied directly to the raw input data or to the residuals obtained after fitting one of the five statistical models defined in `time_series_model.py` (classes with `Residual` in their names).
|
|
146
|
+
## 🚀 Getting Started
|
|
158
147
|
|
|
159
|
-
### Block Bootstrap
|
|
160
|
-
Block Bootstrap is a prevalent approach in time series bootstrapping. It involves resampling blocks of consecutive data points, thus respecting the internal structures of the data. There are several techniques under Block Bootstrap, each with its unique approach. `tsbootstrap` provides highly flexible block bootstrapping, allowing the user to specify the block length sampling, block generation, and block resampling strategies. For additional details, refer to `block_length_sampler.py`, `block_generator.py`, and `block_resampler.py`.
|
|
161
148
|
|
|
162
|
-
|
|
149
|
+
### 🎮 Using tsbootstrap
|
|
163
150
|
|
|
164
|
-
|
|
151
|
+
`tsbootstrap` provides a unified, `sklearn`-like interface to all bootstrap methods.
|
|
165
152
|
|
|
166
|
-
|
|
153
|
+
Example using a `MovingBlockBootstrap` - all bootstrap algorithms follow
|
|
154
|
+
the same interface!
|
|
167
155
|
|
|
168
|
-
|
|
169
|
-
|
|
156
|
+
```python
|
|
157
|
+
from tsbootstrap import MovingBlockBootstrap
|
|
158
|
+
import numpy as np
|
|
170
159
|
|
|
171
|
-
|
|
172
|
-
|
|
160
|
+
# Create custom time series data. While below is for univariate time series, the bootstraps can handle multivariate time series as well.
|
|
161
|
+
n_samples = 10
|
|
162
|
+
X = np.arange(n_samples)
|
|
173
163
|
|
|
174
|
-
|
|
175
|
-
|
|
164
|
+
# Instantiate the bootstrap object
|
|
165
|
+
n_bootstraps = 3
|
|
166
|
+
block_length = 3
|
|
167
|
+
rng = 42
|
|
168
|
+
mbb = MovingBlockBootstrap(n_bootstraps=n_bootstraps, rng=rng, block_length=block_length)
|
|
169
|
+
|
|
170
|
+
# Generate bootstrapped samples
|
|
171
|
+
return_indices = False
|
|
172
|
+
bootstrapped_samples = mbb.bootstrap(
|
|
173
|
+
X, return_indices=return_indices)
|
|
174
|
+
|
|
175
|
+
# Collect bootstrap samples
|
|
176
|
+
X_bootstrapped = []
|
|
177
|
+
for data in bootstrapped_samples:
|
|
178
|
+
X_bootstrapped.append(data)
|
|
179
|
+
|
|
180
|
+
X_bootstrapped = np.array(X_bootstrapped)
|
|
181
|
+
```
|
|
176
182
|
|
|
177
|
-
|
|
178
|
-
This method is implemented in `NonOverlappingBlockBootstrap` and resamples blocks of data without overlap. It's useful when the data has dependencies that need to be preserved and when overfitting is a concern. It's not recommended when the data does not have any significant dependencies or when the introduction of bias due to non-overlapping selection is a concern.
|
|
183
|
+
### 📦 Installation and Setup
|
|
179
184
|
|
|
180
|
-
|
|
181
|
-
Bartlett's method is a time series bootstrap method that uses a window or filter that tapers off as you move away from the center of the window. It's useful when you have a large amount of data and you want to reduce the influence of the data points far away from the center. This method is not advised when the tapering of data points is not desired or when the dataset is small as the tapered data points might contain valuable information. It is implemented in `BartlettsBootstrap`.
|
|
185
|
+
``tsbootstrap`` is installed via ``pip``, either from PyPI or locally.
|
|
182
186
|
|
|
183
|
-
####
|
|
184
|
-
Similar to Bartlett's method, Blackman's method uses a window that tapers off as you move away from the center of the window. The key difference is the shape of the window (Blackman window has a different shape than Bartlett). It's useful when you want to reduce the influence of the data points far from the center with a different window shape. It's not recommended when the dataset is small or tapering of data points is not desired. It is implemented in `BlackmanBootstrap`.
|
|
187
|
+
#### ✔️ Prerequisites
|
|
185
188
|
|
|
186
|
-
|
|
187
|
-
|
|
189
|
+
- Python (3.8 or higher)
|
|
190
|
+
- `pip` (latest version recommended), plus suitable environment manager (`venv`, `conda`)
|
|
188
191
|
|
|
189
|
-
|
|
190
|
-
This method also uses a specific type of window function. It's useful when you want to reduce the influence of the data points far from the center with the Hanning window shape. It's not recommended for small datasets or when tapering of data points is not desired. It is implemented in `HanningBootstrap`.
|
|
192
|
+
You can also consider using ``uv`` to speed up environment setu.
|
|
191
193
|
|
|
192
|
-
####
|
|
193
|
-
Similar to the Bartlett, Blackman, Hamming, and Hanning methods, the Tukey method uses a specific type of window function. It's useful when you want to reduce the influence of the data points far from the center with the Tukey window shape. It's not recommended for small datasets or when tapering of data points is not desired. It is implemented in `TukeyBootstrap`.
|
|
194
|
+
#### Installing from PyPI
|
|
194
195
|
|
|
195
|
-
|
|
196
|
-
Residual Bootstrap is a method designed for time series data where a model is fit to the data, and the residuals (the difference between the observed and predicted data) are bootstrapped. It's particularly useful when a good model fit is available for the data. However, it's not recommended when a model fit is not available or is poor. `tsbootstrap` provides four time series models to fit to the input data -- `AutoReg`, `ARIMA`, `SARIMA`, and `VAR` (for multivariate input time series data). For more details, refer to `time_series_model.py` and `tsfit.py`.
|
|
196
|
+
To install the latest release of `tsbootstrap` directly from PyPI, run:
|
|
197
197
|
|
|
198
|
-
|
|
199
|
-
|
|
198
|
+
```sh
|
|
199
|
+
pip install tsbootstrap
|
|
200
|
+
```
|
|
200
201
|
|
|
201
|
-
|
|
202
|
-
Distribution Bootstrap generates bootstrapped samples by fitting a distribution to the residuals and then generating new residuals from the fitted distribution. The new residuals are then added to the fitted values to create the bootstrapped samples. This method is based on the assumption that the residuals follow a specific distribution (like Gaussian, Poisson, etc). It's not recommended when the distribution of residuals is unknown or hard to determine. It is implemented in `DistributionBootstrap`.
|
|
202
|
+
To install with all optional dependencies:
|
|
203
203
|
|
|
204
|
-
|
|
205
|
-
|
|
204
|
+
```
|
|
205
|
+
pip install "tsbootstrap[all_extras]"
|
|
206
|
+
```
|
|
207
|
+
---
|
|
206
208
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
+
Bootstrap algorithms manage their own dependencies - if an extra is needed but not
|
|
210
|
+
present, the object will raise this at construction.
|
|
209
211
|
|
|
210
212
|
## 🧩 Modules
|
|
211
213
|
The `tsbootstrap` package contains various modules that handle tasks such as bootstrapping, time series simulation, and utility functions. This modular approach ensures flexibility, extensibility, and ease of maintenance.
|
|
@@ -219,7 +221,7 @@ The `tsbootstrap` package contains various modules that handle tasks such as boo
|
|
|
219
221
|
| [commitlint.config.js](https://github.com/astrogilda/tsbootstrap/blob/main/commitlint.config.js) | Configuration for enforcing conventional commit messages. |
|
|
220
222
|
| [CITATION.cff](https://github.com/astrogilda/tsbootstrap/blob/main/CITATION.cff) | Citation metadata for the project. |
|
|
221
223
|
| [CODE_OF_CONDUCT.md](https://github.com/astrogilda/tsbootstrap/blob/main/CODE_OF_CONDUCT.md) | Guidelines for community conduct and interactions. |
|
|
222
|
-
| [CONTRIBUTING.md](https://github.com/astrogilda/tsbootstrap/blob/main/
|
|
224
|
+
| [CONTRIBUTING.md](https://github.com/astrogilda/tsbootstrap/blob/main/CONTRIBUTING.md) | Instructions for contributing to the project. |
|
|
223
225
|
| [.codeclimate.yml](https://github.com/astrogilda/tsbootstrap/blob/main/.codeclimate.yml) | Configuration for Code Climate quality checks. |
|
|
224
226
|
| [.gitignore](https://github.com/astrogilda/tsbootstrap/blob/main/.gitignore) | Specifies files and folders to be ignored by Git. |
|
|
225
227
|
| [.pre-commit-config.yaml](https://github.com/astrogilda/tsbootstrap/blob/main/.pre-commit-config.yaml) | Configuration for pre-commit hooks. |
|
|
@@ -261,108 +263,62 @@ The `tsbootstrap` package contains various modules that handle tasks such as boo
|
|
|
261
263
|
|
|
262
264
|
</details>
|
|
263
265
|
|
|
264
|
-
---
|
|
265
266
|
|
|
266
|
-
##
|
|
267
|
+
## 🗺 Roadmap
|
|
267
268
|
|
|
268
|
-
|
|
269
|
+
This is an abridged version; for the complete and evolving list of plans and improvements, see [Issue #144](https://github.com/astrogilda/tsbootstrap/issues/144).
|
|
269
270
|
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
271
|
+
- **Performance and Scaling**: handling large datasets, distributed backend integration (`Dask`, `Spark`, `Ray`), profiling/optimization
|
|
272
|
+
- **Tuning and AutoML**: adaptive block length, adaptive resampling, evaluation based parameter selection
|
|
273
|
+
- **Real-time and Stream Data**: stream bootstraps, data update interface
|
|
274
|
+
- **Stage 2 `sktime` Integration**: evaluation module, datasets, benchmarks, sktime forecasters in bootstraps
|
|
275
|
+
- **API and Capability Extension**: panel/hierarchical data, exogenous data, update/stream, model state management
|
|
276
|
+
- **Scope Extension (TBD)**: time series augmentation, fully probabilistic models
|
|
274
277
|
|
|
275
|
-
|
|
278
|
+
## 🤝 Contributing
|
|
276
279
|
|
|
277
|
-
|
|
280
|
+
Contributions are always welcome!
|
|
278
281
|
|
|
279
|
-
|
|
282
|
+
See our [good first issues ](https://github.com/astrogilda/tsbootstrap/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22)
|
|
283
|
+
for getting started.
|
|
280
284
|
|
|
281
|
-
|
|
282
|
-
- [Python](https://www.python.org/downloads/)
|
|
283
|
-
- [Poetry](https://python-poetry.org/docs/#installation)
|
|
284
|
-
- [Bash](https://www.gnu.org/software/bash/)
|
|
285
|
+
Below is a quick start guide to contributing.
|
|
285
286
|
|
|
286
|
-
|
|
287
|
-
```sh
|
|
288
|
-
git clone https://github.com/astrogilda/tsbootstrap
|
|
289
|
-
```
|
|
287
|
+
### Developer setup
|
|
290
288
|
|
|
291
|
-
|
|
292
|
-
```sh
|
|
293
|
-
cd tsbootstrap
|
|
294
|
-
```
|
|
289
|
+
1. Fork the tsbootstrap repository
|
|
295
290
|
|
|
296
|
-
|
|
291
|
+
2. Clone the fork to local:
|
|
297
292
|
```sh
|
|
298
|
-
|
|
293
|
+
git clone https://github.com/astrogilda/tsbootstrap
|
|
299
294
|
```
|
|
300
295
|
|
|
301
|
-
|
|
302
|
-
```sh
|
|
303
|
-
./setup.sh
|
|
304
|
-
```
|
|
296
|
+
3. In the local repository root, set up a python environment, e.g., `venv` or `conda`.
|
|
305
297
|
|
|
306
|
-
The `setup.sh` script sets up a Python environment using Poetry, locks and installs the necessary dependencies, and installs `dtaidistance` if the Python version is 3.9 or lower.
|
|
307
298
|
|
|
308
|
-
|
|
309
|
-
```
|
|
310
|
-
|
|
299
|
+
4. Editable install via `pip`, including developer dependencies:
|
|
300
|
+
```
|
|
301
|
+
pip install -e ".[dev]"
|
|
311
302
|
```
|
|
312
303
|
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
### 🎮 Using tsbootstrap
|
|
316
|
-
|
|
317
|
-
Here's a basic example using the Moving Block Bootstrap method:
|
|
318
|
-
|
|
319
|
-
```python
|
|
320
|
-
from tsbootstrap import MovingBlockBootstrap, MovingBlockBootstrapConfig
|
|
321
|
-
import numpy as np
|
|
322
|
-
|
|
323
|
-
np.random.seed(0)
|
|
324
|
-
|
|
325
|
-
# Create custom time series data
|
|
326
|
-
|
|
327
|
-
n_samples = 1000
|
|
328
|
-
|
|
329
|
-
y = np.random.normal(0, 1, n_samples).cumsum()
|
|
330
|
-
|
|
331
|
-
x1 = np.arange(1, n_samples + 1).reshape(-1, 1)
|
|
332
|
-
x2 = np.random.normal(0, 1, (n_samples, 1))
|
|
333
|
-
exog = np.concatenate([x1, x2], axis=1)
|
|
304
|
+
The editable install ensures that changes to the package are reflected in
|
|
305
|
+
your environment.
|
|
334
306
|
|
|
335
|
-
|
|
336
|
-
mbb_config = MovingBlockBootstrapConfig(
|
|
337
|
-
n_bootstraps=1000, rng=42, block_length=10
|
|
338
|
-
)
|
|
339
|
-
mbb = MovingBlockBootstrap(config=mbb_config)
|
|
307
|
+
### Verifying the Installation
|
|
340
308
|
|
|
341
|
-
|
|
342
|
-
bootstrapped_samples = bootstrap.bootstrap(n=1000)
|
|
309
|
+
After installation, you can verify that tsbootstrap has been installed correctly by checking its version or by trying to import it in Python:
|
|
343
310
|
```
|
|
344
|
-
|
|
345
|
-
### 🧪 Running Tests
|
|
346
|
-
```sh
|
|
347
|
-
pytest tests/
|
|
311
|
+
python -c "import tsbootstrap; print(tsbootstrap.__version__)"
|
|
348
312
|
```
|
|
349
313
|
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
## 🗺 Roadmap
|
|
354
|
-
|
|
355
|
-
> - [ ] `ℹ️ Task 1: in distributionbootstrap, allow mixture of distributions`
|
|
356
|
-
> - [ ] `ℹ️ Task 2: allow fractional block_length`
|
|
357
|
-
> - [ ] `ℹ️ Task 3: enable multi-processing`
|
|
358
|
-
> - [ ] `ℹ️ Task 4: test -- for biascorrectblockbootstrap, see if the statistic on the bootstrapped sample is close to the statistic on the original sample`
|
|
314
|
+
This command should output the version number of tsbootstrap without any errors, indicating that the installation was successful.
|
|
359
315
|
|
|
316
|
+
That's it! You are now set up and ready to go. You can start using tsbootstrap for your time series bootstrapping needs.
|
|
360
317
|
|
|
361
|
-
|
|
318
|
+
### Contribution workflow
|
|
362
319
|
|
|
363
320
|
Contributions are always welcome! Please follow these steps:
|
|
364
|
-
|
|
365
|
-
2. Clone the forked repository to your local machine using a Git client like Git or GitHub Desktop.
|
|
321
|
+
|
|
366
322
|
3. Create a new branch with a descriptive name (e.g., `new-feature-branch` or `bugfix-issue-123`).
|
|
367
323
|
```sh
|
|
368
324
|
git checkout -b new-feature-branch
|
|
@@ -379,6 +335,25 @@ git push origin new-feature-branch
|
|
|
379
335
|
7. Create a new pull request to the original project repository. In the pull request, describe the changes you've made and why they're necessary.
|
|
380
336
|
The project maintainers will review your changes and provide feedback or merge them into the main branch.
|
|
381
337
|
|
|
338
|
+
### 🧪 Running Tests
|
|
339
|
+
|
|
340
|
+
To run all tests, in your developer environment, run:
|
|
341
|
+
|
|
342
|
+
```sh
|
|
343
|
+
pytest tests/
|
|
344
|
+
```
|
|
345
|
+
|
|
346
|
+
Individual bootstrap algorithms can be tested as follows:
|
|
347
|
+
|
|
348
|
+
```python
|
|
349
|
+
from tsbootstrap.utils import check_estimator
|
|
350
|
+
|
|
351
|
+
check_estimator(my_bootstrap_algo)
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
### Contribution guide
|
|
355
|
+
|
|
356
|
+
For more detailed information on how to contribute, please refer to our [CONTRIBUTING.md](https://github.com/astrogilda/tsbootstrap/blob/main/CONTRIBUTING.md) guide.
|
|
382
357
|
---
|
|
383
358
|
|
|
384
359
|
## 📄 License
|
|
@@ -400,3 +375,67 @@ Thanks goes to these wonderful people:
|
|
|
400
375
|
<!-- ALL-CONTRIBUTORS-LIST:END -->
|
|
401
376
|
|
|
402
377
|
This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome!
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
---
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
## 📍 Time Series Bootstrapping
|
|
384
|
+
`tsbootstrap` is a comprehensive project designed to implement an array of bootstrapping techniques specifically tailored for time series data. This project is targeted towards data scientists, statisticians, economists, and other professionals or researchers who regularly work with time series data and require robust methods for generating bootstrapped copies of univariate and multivariate time series data.
|
|
385
|
+
|
|
386
|
+
### Overview
|
|
387
|
+
Time series bootstrapping is a nuanced resampling method that is applied to time-dependent data. Traditional bootstrapping methods often assume independence between data points, which is an assumption that does not hold true for time series data where a data point is often dependent on previous data points. Time series bootstrapping techniques respect the chronological order and correlations of the data, providing more accurate estimates of uncertainty or variability.
|
|
388
|
+
|
|
389
|
+
### Bootstrapping Methodology
|
|
390
|
+
The `tsbootstrap` project offers a diverse set of bootstrapping techniques that can be applied to either the entire input time series (classes prefixed with `Whole`), or after partitioning the data into blocks (classes prefixed with `Block`). These methodologies can be applied directly to the raw input data or to the residuals obtained after fitting one of the five statistical models defined in `time_series_model.py` (classes with `Residual` in their names).
|
|
391
|
+
|
|
392
|
+
### Block Bootstrap
|
|
393
|
+
Block Bootstrap is a prevalent approach in time series bootstrapping. It involves resampling blocks of consecutive data points, thus respecting the internal structures of the data. There are several techniques under Block Bootstrap, each with its unique approach. `tsbootstrap` provides highly flexible block bootstrapping, allowing the user to specify the block length sampling, block generation, and block resampling strategies. For additional details, refer to `block_length_sampler.py`, `block_generator.py`, and `block_resampler.py`.
|
|
394
|
+
|
|
395
|
+
The Moving Block Bootstrap, Circular Block Bootstrap, Stationary Block Bootstrap, and NonOverlapping Block Bootstrap methods are all variations of the Block Bootstrap that use different methods to sample the data, maintaining various types of dependencies.
|
|
396
|
+
|
|
397
|
+
Bartlett's, Blackman's, Hamming's, Hanning's, and Tukey's Bootstrap methods are specific implementations of the Block Bootstrap that use different window shapes to taper the data, reducing the influence of data points far from the center. In `tsbootstrap`, these methods inherit from `MovingBlockBootstrap`, but can easily be modified to inherit from any of the other three base block bootstrapping classes.
|
|
398
|
+
|
|
399
|
+
Each method comes with its distinct strengths and weaknesses. The choice of method should be based on the characteristics of the data and the specific requirements of the analysis.
|
|
400
|
+
|
|
401
|
+
#### (i) Moving Block Bootstrap
|
|
402
|
+
This method is implemented in `MovingBlockBootstrap` and is used for time series data where blocks of data are resampled to maintain the dependency structure within the blocks. It's useful when the data has dependencies that need to be preserved. It's not recommended when the data does not have any significant dependencies.
|
|
403
|
+
|
|
404
|
+
#### (ii) Circular Block Bootstrap
|
|
405
|
+
This method is implemented in `CircularBlockBootstrap` and treats the data as if it is circular (the end of the data is next to the beginning of the data). It's useful when the data is cyclical or seasonal in nature. It's not recommended when the data does not have a cyclical or seasonal component.
|
|
406
|
+
|
|
407
|
+
#### (iii) Stationary Block Bootstrap
|
|
408
|
+
This method is implemented in `StationaryBlockBootstrap` and randomly resamples blocks of data with block lengths that follow a geometric distribution. It's useful for time series data where the degree of dependency needs to be preserved, and it doesn't require strict stationarity of the underlying process. It's not recommended when the data has strong seasonality or trend components which violate the weak dependence assumption.
|
|
409
|
+
|
|
410
|
+
#### (iv) NonOverlapping Block Bootstrap
|
|
411
|
+
This method is implemented in `NonOverlappingBlockBootstrap` and resamples blocks of data without overlap. It's useful when the data has dependencies that need to be preserved and when overfitting is a concern. It's not recommended when the data does not have any significant dependencies or when the introduction of bias due to non-overlapping selection is a concern.
|
|
412
|
+
|
|
413
|
+
#### (v) Bartlett's Bootstrap
|
|
414
|
+
Bartlett's method is a time series bootstrap method that uses a window or filter that tapers off as you move away from the center of the window. It's useful when you have a large amount of data and you want to reduce the influence of the data points far away from the center. This method is not advised when the tapering of data points is not desired or when the dataset is small as the tapered data points might contain valuable information. It is implemented in `BartlettsBootstrap`.
|
|
415
|
+
|
|
416
|
+
#### (vi) Blackman Bootstrap
|
|
417
|
+
Similar to Bartlett's method, Blackman's method uses a window that tapers off as you move away from the center of the window. The key difference is the shape of the window (Blackman window has a different shape than Bartlett). It's useful when you want to reduce the influence of the data points far from the center with a different window shape. It's not recommended when the dataset is small or tapering of data points is not desired. It is implemented in `BlackmanBootstrap`.
|
|
418
|
+
|
|
419
|
+
#### (vii) Hamming Bootstrap
|
|
420
|
+
Similar to the Bartlett and Blackman methods, the Hamming method uses a specific type of window function. It's useful when you want to reduce the influence of the data points far from the center with the Hamming window shape. It's not recommended for small datasets or when tapering of data points is not desired. It is implemented in `HammingBootstrap`.
|
|
421
|
+
|
|
422
|
+
#### (viii) Hanning Bootstrap
|
|
423
|
+
This method also uses a specific type of window function. It's useful when you want to reduce the influence of the data points far from the center with the Hanning window shape. It's not recommended for small datasets or when tapering of data points is not desired. It is implemented in `HanningBootstrap`.
|
|
424
|
+
|
|
425
|
+
#### (ix) Tukey Bootstrap
|
|
426
|
+
Similar to the Bartlett, Blackman, Hamming, and Hanning methods, the Tukey method uses a specific type of window function. It's useful when you want to reduce the influence of the data points far from the center with the Tukey window shape. It's not recommended for small datasets or when tapering of data points is not desired. It is implemented in `TukeyBootstrap`.
|
|
427
|
+
|
|
428
|
+
### Residual Bootstrap
|
|
429
|
+
Residual Bootstrap is a method designed for time series data where a model is fit to the data, and the residuals (the difference between the observed and predicted data) are bootstrapped. It's particularly useful when a good model fit is available for the data. However, it's not recommended when a model fit is not available or is poor. `tsbootstrap` provides four time series models to fit to the input data -- `AutoReg`, `ARIMA`, `SARIMA`, and `VAR` (for multivariate input time series data). For more details, refer to `time_series_model.py` and `tsfit.py`.
|
|
430
|
+
|
|
431
|
+
### Statistic-Preserving Bootstrap
|
|
432
|
+
Statistic-Preserving Bootstrap is a unique method designed to generate bootstrapped time series data while preserving a specific statistic of the original data. This method can be beneficial in scenarios where it's important to maintain the original data's characteristics in the bootstrapped samples. It is implemented in `StatisticPreservingBootstrap`.
|
|
433
|
+
|
|
434
|
+
### Distribution Bootstrap
|
|
435
|
+
Distribution Bootstrap generates bootstrapped samples by fitting a distribution to the residuals and then generating new residuals from the fitted distribution. The new residuals are then added to the fitted values to create the bootstrapped samples. This method is based on the assumption that the residuals follow a specific distribution (like Gaussian, Poisson, etc). It's not recommended when the distribution of residuals is unknown or hard to determine. It is implemented in `DistributionBootstrap`.
|
|
436
|
+
|
|
437
|
+
### Markov Bootstrap
|
|
438
|
+
Markov Bootstrap is used for bootstrapping time series data where the residuals of the data are presumed to follow a Markov process. This method is especially useful in scenarios where the current residual primarily depends on the previous one, with little to no dependency on residuals from further in the past. Markov Bootstrap technique is designed to preserve this dependency structure in the bootstrapped samples, making it particularly valuable for time series data that exhibits Markov properties. However, it's not advisable when the residuals of the time series data exhibit long-range dependencies, as the Markov assumption of limited dependency may not hold true. It is implemented in `MarkovBootstrap`. See `markov_sampler.py` for implementation details.
|
|
439
|
+
|
|
440
|
+
### Sieve Bootstrap
|
|
441
|
+
Sieve Bootstrap is designed for handling dependent data, where the residuals of the time series data follow an autoregressive process. This method aims to preserve and simulate the dependencies inherent in the original data within the bootstrapped samples. It operates by approximating the autoregressive process ofthe residuals using a finite order autoregressive model. The order of the model is determined based on the data, and the residuals are then bootstrapped. The Sieve Bootstrap technique is particularly valuable for time series data that exhibits autoregressive properties. However, it's not advisable when the residuals of the time series data do not follow an autoregressive process. It is implemented in `SieveBootstrap`. See `time_series_simulator.py` for implementations details.
|