tsbootstrap 0.1.0__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {tsbootstrap-0.1.0/src/tsbootstrap.egg-info → tsbootstrap-0.1.2}/PKG-INFO +189 -150
  2. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/README.md +170 -143
  3. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/pyproject.toml +38 -47
  4. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/base_bootstrap.py +166 -49
  5. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/base_bootstrap_configs.py +4 -4
  6. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/block_bootstrap.py +162 -86
  7. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/block_bootstrap_configs.py +15 -14
  8. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/block_generator.py +43 -31
  9. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/block_length_sampler.py +2 -2
  10. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/block_resampler.py +140 -94
  11. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/bootstrap.py +67 -87
  12. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/markov_sampler.py +40 -36
  13. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/ranklags.py +7 -1
  14. tsbootstrap-0.1.2/src/tsbootstrap/registry/tests/test_tags.py +46 -0
  15. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/tests/test_all_bootstraps.py +41 -4
  16. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/time_series_model.py +17 -17
  17. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/time_series_simulator.py +12 -12
  18. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/tsfit.py +4 -2
  19. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/utils/odds_and_ends.py +6 -3
  20. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/utils/types.py +19 -6
  21. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/utils/validate.py +18 -16
  22. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2/src/tsbootstrap.egg-info}/PKG-INFO +189 -150
  23. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap.egg-info/requires.txt +21 -7
  24. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/tests/test_base_bootstrap_configs.py +0 -1
  25. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/tests/test_block_bootstrap.py +1 -2
  26. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/tests/test_block_generator.py +3 -3
  27. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/tests/test_block_resampler.py +1 -16
  28. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/tests/test_time_series_model.py +0 -1
  29. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/tests/test_time_series_simulator.py +35 -6
  30. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/tests/test_tsfit.py +0 -3
  31. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/tests/test_validate.py +2 -1
  32. tsbootstrap-0.1.0/src/tsbootstrap/registry/tests/test_tags.py +0 -24
  33. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/LICENSE +0 -0
  34. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/setup.cfg +0 -0
  35. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/__init__.py +0 -0
  36. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/py.typed +0 -0
  37. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/registry/__init__.py +0 -0
  38. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/registry/_lookup.py +0 -0
  39. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/registry/_tags.py +0 -0
  40. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/registry/tests/__init__.py +0 -0
  41. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/tests/__init__.py +0 -0
  42. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/tests/scenarios/__init__.py +0 -0
  43. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/tests/scenarios/scenarios.py +0 -0
  44. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/tests/scenarios/scenarios_bootstrap.py +0 -0
  45. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/tests/scenarios/scenarios_getter.py +0 -0
  46. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/tests/test_all_estimators.py +0 -0
  47. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/tests/test_class_register.py +0 -0
  48. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/tests/test_switch.py +0 -0
  49. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/utils/__init__.py +0 -0
  50. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/utils/dependencies.py +0 -0
  51. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap/utils/estimator_checks.py +0 -0
  52. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap.egg-info/SOURCES.txt +0 -0
  53. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap.egg-info/dependency_links.txt +0 -0
  54. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/src/tsbootstrap.egg-info/top_level.txt +0 -0
  55. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/tests/test_block_bootstrap_configs.py +0 -0
  56. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/tests/test_block_length_sampler.py +0 -0
  57. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/tests/test_bootstrap.py +0 -0
  58. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/tests/test_markov_sampler.py +0 -0
  59. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/tests/test_odds_and_ends.py +0 -0
  60. {tsbootstrap-0.1.0 → tsbootstrap-0.1.2}/tests/test_ranklags.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tsbootstrap
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: A Python package to generate bootstrapped time series
5
5
  Author-email: Sankalp Gilda <sankalp.gilda@gmail.com>
6
6
  Maintainer-email: Sankalp Gilda <sankalp.gilda@gmail.com>, Franz Kiraly <franz.kiraly@sktime.net>, Benedikt Heidrich <benedikt.heidrich@sktime.net>
@@ -43,16 +43,17 @@ Requires-Python: <3.13,>=3.8
43
43
  Description-Content-Type: text/markdown
44
44
  License-File: LICENSE
45
45
  Requires-Dist: numpy<1.27,>=1.21
46
- Requires-Dist: scikit-base<0.8.0,>=0.6.1
47
- Requires-Dist: scikit-learn<1.5.0,>=0.24
46
+ Requires-Dist: scikit-base<=0.9.0,>=0.8.0
47
+ Requires-Dist: scikit-learn<=1.5.1,>=0.24
48
48
  Requires-Dist: scipy<2.0.0,>=1.2
49
49
  Requires-Dist: packaging
50
50
  Provides-Extra: all-extras
51
- Requires-Dist: arch<6.0.0,>=5.0.0; extra == "all-extras"
51
+ Requires-Dist: arch<=7.0.0,>=5.0.0; extra == "all-extras"
52
52
  Requires-Dist: hmmlearn<0.3.2,>=0.3.0; extra == "all-extras"
53
53
  Requires-Dist: pyclustering<0.11.0,>=0.10.0; extra == "all-extras"
54
54
  Requires-Dist: scikit_learn_extra<0.4.0,>=0.3.0; extra == "all-extras"
55
55
  Requires-Dist: statsmodels<0.15.0,>=0.12.1; extra == "all-extras"
56
+ Requires-Dist: dtaidistance; python_version < "3.10" and extra == "all-extras"
56
57
  Provides-Extra: docs
57
58
  Requires-Dist: furo; extra == "docs"
58
59
  Requires-Dist: jupyter; extra == "docs"
@@ -62,19 +63,30 @@ Requires-Dist: numpydoc; extra == "docs"
62
63
  Requires-Dist: pydata-sphinx-theme; extra == "docs"
63
64
  Requires-Dist: Sphinx!=7.2.0,<8.0.0; extra == "docs"
64
65
  Requires-Dist: sphinx-rtd-theme>=1.3.0; extra == "docs"
65
- Requires-Dist: sphinx-copybutton; extra == "docs"
66
+ Requires-Dist: sphinx-copybutton>=0.5.2; extra == "docs"
66
67
  Requires-Dist: sphinx-design<0.6.0; extra == "docs"
67
68
  Requires-Dist: sphinx-gallery<0.15.0; extra == "docs"
68
69
  Requires-Dist: sphinx-issues<4.0.0; extra == "docs"
69
70
  Requires-Dist: sphinx-version-warning; extra == "docs"
70
- Requires-Dist: tabulate; extra == "docs"
71
+ Requires-Dist: tabulate>=0.9.0; extra == "docs"
71
72
  Provides-Extra: dev
72
- Requires-Dist: black; extra == "dev"
73
+ Requires-Dist: black>=24.3.0; extra == "dev"
73
74
  Requires-Dist: blacken-docs; extra == "dev"
74
75
  Requires-Dist: hypothesis; extra == "dev"
75
76
  Requires-Dist: pre-commit; extra == "dev"
76
77
  Requires-Dist: pytest; extra == "dev"
77
78
  Requires-Dist: pytest-cov; extra == "dev"
79
+ Requires-Dist: github-actions; extra == "dev"
80
+ Requires-Dist: importlib-metadata; extra == "dev"
81
+ Requires-Dist: pip-tools; extra == "dev"
82
+ Requires-Dist: pyright; extra == "dev"
83
+ Requires-Dist: ruff; extra == "dev"
84
+ Requires-Dist: autoflake; extra == "dev"
85
+ Requires-Dist: typos; extra == "dev"
86
+ Requires-Dist: tox; extra == "dev"
87
+ Requires-Dist: tox-gh-actions; extra == "dev"
88
+ Requires-Dist: pycobertura; extra == "dev"
89
+ Requires-Dist: tomlkit; extra == "dev"
78
90
 
79
91
  <!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
80
92
  [![All Contributors](https://img.shields.io/github/all-contributors/astrogilda/tsbootstrap?color=ee8449&style=flat-square)](#contributors)
@@ -107,7 +119,7 @@ Requires-Dist: pytest-cov; extra == "dev"
107
119
  </a>
108
120
  <img src="https://github.com/astrogilda/tsbootstrap/workflows/CI/badge.svg" alt="Build Status"/>
109
121
  <a href="https://codecov.io/gh/astrogilda/tsbootstrap"><img src="https://codecov.io/gh/astrogilda/tsbootstrap/branch/main/graph/badge.svg" alt="codecov"/></a>
110
- <a href="https://doi.org/10.5281/zenodo.8226496"><img src="https://zenodo.org/badge/DOI/10.5281/zenodo.8226496.svg" alt="DOI"/></a>
122
+ <a href="https://doi.org/10.5281/zenodo.8226495"><img src="https://zenodo.org/badge/DOI/10.5281/zenodo.8226495.svg" alt="DOI"/></a>
111
123
  <a href="https://codeclimate.com/github/astrogilda/tsbootstrap/maintainability"><img src="https://api.codeclimate.com/v1/badges/d80a0615a8c00f31565c/maintainability" alt="Code Quality"/></a>
112
124
  <img src="https://img.shields.io/github/last-commit/astrogilda/tsbootstrap" alt="Last Commit"/>
113
125
  <img src="https://img.shields.io/github/issues/astrogilda/tsbootstrap" alt="Issues"/>
@@ -118,94 +130,84 @@ Requires-Dist: pytest-cov; extra == "dev"
118
130
 
119
131
 
120
132
  ## 📒 Table of Contents
121
- 1. [📍 Time Series Bootstrapping](#time-series-bootstrapping)
122
- - [Overview](#overview)
123
- - [Bootstrapping Methodology](#bootstrapping-methodology)
124
- - [Block Bootstrap](#block-bootstrap)
125
- - [Moving Block Bootstrap](#moving-block-bootstrap)
126
- - [Circular Block Bootstrap](#circular-block-bootstrap)
127
- - [Stationary Block Bootstrap](#stationary-block-bootstrap)
128
- - [NonOverlapping Block Bootstrap](#nonoverlapping-block-bootstrap)
129
- - [Bartletts Bootstrap](#bartletts-bootstrap)
130
- - [Blackman Bootstrap](#blackman-bootstrap)
131
- - [Hamming Bootstrap](#hamming-bootstrap)
132
- - [Hanning Bootstrap](#hanning-bootstrap)
133
- - [Tukey Bootstrap](#tukey-bootstrap)
134
- - [Residual Bootstrap](#residual-bootstrap)
135
- - [Bias Corrected Bootstrap](#bias-corrected-bootstrap)
136
- - [Distribution Bootstrap](#distribution-bootstrap)
137
- - [Markov Bootstrap](#markov-bootstrap)
138
- - [Sieve Bootstrap](#sieve-bootstrap)
139
- 3. [🧩 Modules](#-modules)
140
- 4. [🚀 Getting Started](#-getting-started)
141
- 5. [🗺 Roadmap](#-roadmap)
142
- 6. [🤝 Contributing](#-contributing)
143
- 7. [📄 License](#-license)
144
- 8. [👏 Acknowledgments](#-acknowledgments)
145
133
 
146
-
147
- ---
134
+ 1. [🚀 Getting Started](#-getting-started)
135
+ 2. [🧩 Modules](#-modules)
136
+ 3. [🗺 Roadmap](#-roadmap)
137
+ 4. [🤝 Contributing](#-contributing)
138
+ 5. [📄 License](#-license)
139
+ 6. [📍 Time Series Bootstrapping Methods intro](#time-series-bootstrapping)
140
+ 7. [👏 Contributors](#-contributors)
148
141
 
149
142
 
150
- ## 📍 Time Series Bootstrapping
151
- `tsbootstrap` is a comprehensive project designed to implement an array of bootstrapping techniques specifically tailored for time series data. This project is targeted towards data scientists, statisticians, economists, and other professionals or researchers who regularly work with time series data and require robust methods for generating bootstrapped copies of univariate and multivariate time series data.
152
143
 
153
- ### Overview
154
- Time series bootstrapping is a nuanced resampling method that is applied to time-dependent data. Traditional bootstrapping methods often assume independence between data points, which is an assumption that does not hold true for time series data where a data point is often dependent on previous data points. Time series bootstrapping techniques respect the chronological order and correlations of the data, providing more accurate estimates of uncertainty or variability.
144
+ ---
155
145
 
156
- ### Bootstrapping Methodology
157
- The `tsbootstrap` project offers a diverse set of bootstrapping techniques that can be applied to either the entire input time series (classes prefixed with `Whole`), or after partitioning the data into blocks (classes prefixed with `Block`). These methodologies can be applied directly to the raw input data or to the residuals obtained after fitting one of the five statistical models defined in `time_series_model.py` (classes with `Residual` in their names).
146
+ ## 🚀 Getting Started
158
147
 
159
- ### Block Bootstrap
160
- Block Bootstrap is a prevalent approach in time series bootstrapping. It involves resampling blocks of consecutive data points, thus respecting the internal structures of the data. There are several techniques under Block Bootstrap, each with its unique approach. `tsbootstrap` provides highly flexible block bootstrapping, allowing the user to specify the block length sampling, block generation, and block resampling strategies. For additional details, refer to `block_length_sampler.py`, `block_generator.py`, and `block_resampler.py`.
161
148
 
162
- The Moving Block Bootstrap, Circular Block Bootstrap, Stationary Block Bootstrap, and NonOverlapping Block Bootstrap methods are all variations of the Block Bootstrap that use different methods to sample the data, maintaining various types of dependencies.
149
+ ### 🎮 Using tsbootstrap
163
150
 
164
- Bartlett's, Blackman's, Hamming's, Hanning's, and Tukey's Bootstrap methods are specific implementations of the Block Bootstrap that use different window shapes to taper the data, reducing the influence of data points far from the center. In `tsbootstrap`, these methods inherit from `MovingBlockBootstrap`, but can easily be modified to inherit from any of the other three base block bootstrapping classes.
151
+ `tsbootstrap` provides a unified, `sklearn`-like interface to all bootstrap methods.
165
152
 
166
- Each method comes with its distinct strengths and weaknesses. The choice of method should be based on the characteristics of the data and the specific requirements of the analysis.
153
+ Example using a `MovingBlockBootstrap` - all bootstrap algorithms follow
154
+ the same interface!
167
155
 
168
- #### (i) Moving Block Bootstrap
169
- This method is implemented in `MovingBlockBootstrap` and is used for time series data where blocks of data are resampled to maintain the dependency structure within the blocks. It's useful when the data has dependencies that need to be preserved. It's not recommended when the data does not have any significant dependencies.
156
+ ```python
157
+ from tsbootstrap import MovingBlockBootstrap
158
+ import numpy as np
170
159
 
171
- #### (ii) Circular Block Bootstrap
172
- This method is implemented in `CircularBlockBootstrap` and treats the data as if it is circular (the end of the data is next to the beginning of the data). It's useful when the data is cyclical or seasonal in nature. It's not recommended when the data does not have a cyclical or seasonal component.
160
+ # Create custom time series data. While below is for univariate time series, the bootstraps can handle multivariate time series as well.
161
+ n_samples = 10
162
+ X = np.arange(n_samples)
173
163
 
174
- #### (iii) Stationary Block Bootstrap
175
- This method is implemented in `StationaryBlockBootstrap` and randomly resamples blocks of data with block lengths that follow a geometric distribution. It's useful for time series data where the degree of dependency needs to be preserved, and it doesn't require strict stationarity of the underlying process. It's not recommended when the data has strong seasonality or trend components which violate the weak dependence assumption.
164
+ # Instantiate the bootstrap object
165
+ n_bootstraps = 3
166
+ block_length = 3
167
+ rng = 42
168
+ mbb = MovingBlockBootstrap(n_bootstraps=n_bootstraps, rng=rng, block_length=block_length)
169
+
170
+ # Generate bootstrapped samples
171
+ return_indices = False
172
+ bootstrapped_samples = mbb.bootstrap(
173
+ X, return_indices=return_indices)
174
+
175
+ # Collect bootstrap samples
176
+ X_bootstrapped = []
177
+ for data in bootstrapped_samples:
178
+ X_bootstrapped.append(data)
179
+
180
+ X_bootstrapped = np.array(X_bootstrapped)
181
+ ```
176
182
 
177
- #### (iv) NonOverlapping Block Bootstrap
178
- This method is implemented in `NonOverlappingBlockBootstrap` and resamples blocks of data without overlap. It's useful when the data has dependencies that need to be preserved and when overfitting is a concern. It's not recommended when the data does not have any significant dependencies or when the introduction of bias due to non-overlapping selection is a concern.
183
+ ### 📦 Installation and Setup
179
184
 
180
- #### (v) Bartlett's Bootstrap
181
- Bartlett's method is a time series bootstrap method that uses a window or filter that tapers off as you move away from the center of the window. It's useful when you have a large amount of data and you want to reduce the influence of the data points far away from the center. This method is not advised when the tapering of data points is not desired or when the dataset is small as the tapered data points might contain valuable information. It is implemented in `BartlettsBootstrap`.
185
+ ``tsbootstrap`` is installed via ``pip``, either from PyPI or locally.
182
186
 
183
- #### (vi) Blackman Bootstrap
184
- Similar to Bartlett's method, Blackman's method uses a window that tapers off as you move away from the center of the window. The key difference is the shape of the window (Blackman window has a different shape than Bartlett). It's useful when you want to reduce the influence of the data points far from the center with a different window shape. It's not recommended when the dataset is small or tapering of data points is not desired. It is implemented in `BlackmanBootstrap`.
187
+ #### ✔️ Prerequisites
185
188
 
186
- #### (vii) Hamming Bootstrap
187
- Similar to the Bartlett and Blackman methods, the Hamming method uses a specific type of window function. It's useful when you want to reduce the influence of the data points far from the center with the Hamming window shape. It's not recommended for small datasets or when tapering of data points is not desired. It is implemented in `HammingBootstrap`.
189
+ - Python (3.8 or higher)
190
+ - `pip` (latest version recommended), plus suitable environment manager (`venv`, `conda`)
188
191
 
189
- #### (viii) Hanning Bootstrap
190
- This method also uses a specific type of window function. It's useful when you want to reduce the influence of the data points far from the center with the Hanning window shape. It's not recommended for small datasets or when tapering of data points is not desired. It is implemented in `HanningBootstrap`.
192
+ You can also consider using ``uv`` to speed up environment setu.
191
193
 
192
- #### (ix) Tukey Bootstrap
193
- Similar to the Bartlett, Blackman, Hamming, and Hanning methods, the Tukey method uses a specific type of window function. It's useful when you want to reduce the influence of the data points far from the center with the Tukey window shape. It's not recommended for small datasets or when tapering of data points is not desired. It is implemented in `TukeyBootstrap`.
194
+ #### Installing from PyPI
194
195
 
195
- ### Residual Bootstrap
196
- Residual Bootstrap is a method designed for time series data where a model is fit to the data, and the residuals (the difference between the observed and predicted data) are bootstrapped. It's particularly useful when a good model fit is available for the data. However, it's not recommended when a model fit is not available or is poor. `tsbootstrap` provides four time series models to fit to the input data -- `AutoReg`, `ARIMA`, `SARIMA`, and `VAR` (for multivariate input time series data). For more details, refer to `time_series_model.py` and `tsfit.py`.
196
+ To install the latest release of `tsbootstrap` directly from PyPI, run:
197
197
 
198
- ### Statistic-Preserving Bootstrap
199
- Statistic-Preserving Bootstrap is a unique method designed to generate bootstrapped time series data while preserving a specific statistic of the original data. This method can be beneficial in scenarios where it's important to maintain the original data's characteristics in the bootstrapped samples. It is implemented in `StatisticPreservingBootstrap`.
198
+ ```sh
199
+ pip install tsbootstrap
200
+ ```
200
201
 
201
- ### Distribution Bootstrap
202
- Distribution Bootstrap generates bootstrapped samples by fitting a distribution to the residuals and then generating new residuals from the fitted distribution. The new residuals are then added to the fitted values to create the bootstrapped samples. This method is based on the assumption that the residuals follow a specific distribution (like Gaussian, Poisson, etc). It's not recommended when the distribution of residuals is unknown or hard to determine. It is implemented in `DistributionBootstrap`.
202
+ To install with all optional dependencies:
203
203
 
204
- ### Markov Bootstrap
205
- Markov Bootstrap is used for bootstrapping time series data where the residuals of the data are presumed to follow a Markov process. This method is especially useful in scenarios where the current residual primarily depends on the previous one, with little to no dependency on residuals from further in the past. Markov Bootstrap technique is designed to preserve this dependency structure in the bootstrapped samples, making it particularly valuable for time series data that exhibits Markov properties. However, it's not advisable when the residuals of the time series data exhibit long-range dependencies, as the Markov assumption of limited dependency may not hold true. It is implemented in `MarkovBootstrap`. See `markov_sampler.py` for implementation details.
204
+ ```
205
+ pip install "tsbootstrap[all_extras]"
206
+ ```
207
+ ---
206
208
 
207
- ### Sieve Bootstrap
208
- Sieve Bootstrap is designed for handling dependent data, where the residuals of the time series data follow an autoregressive process. This method aims to preserve and simulate the dependencies inherent in the original data within the bootstrapped samples. It operates by approximating the autoregressive process ofthe residuals using a finite order autoregressive model. The order of the model is determined based on the data, and the residuals are then bootstrapped. The Sieve Bootstrap technique is particularly valuable for time series data that exhibits autoregressive properties. However, it's not advisable when the residuals of the time series data do not follow an autoregressive process. It is implemented in `SieveBootstrap`. See `time_series_simulator.py` for implementations details.
209
+ Bootstrap algorithms manage their own dependencies - if an extra is needed but not
210
+ present, the object will raise this at construction.
209
211
 
210
212
  ## 🧩 Modules
211
213
  The `tsbootstrap` package contains various modules that handle tasks such as bootstrapping, time series simulation, and utility functions. This modular approach ensures flexibility, extensibility, and ease of maintenance.
@@ -219,7 +221,7 @@ The `tsbootstrap` package contains various modules that handle tasks such as boo
219
221
  | [commitlint.config.js](https://github.com/astrogilda/tsbootstrap/blob/main/commitlint.config.js) | Configuration for enforcing conventional commit messages. |
220
222
  | [CITATION.cff](https://github.com/astrogilda/tsbootstrap/blob/main/CITATION.cff) | Citation metadata for the project. |
221
223
  | [CODE_OF_CONDUCT.md](https://github.com/astrogilda/tsbootstrap/blob/main/CODE_OF_CONDUCT.md) | Guidelines for community conduct and interactions. |
222
- | [CONTRIBUTING.md](https://github.com/astrogilda/tsbootstrap/blob/main/CITATION.md) | Instructions for contributing to the project. |
224
+ | [CONTRIBUTING.md](https://github.com/astrogilda/tsbootstrap/blob/main/CONTRIBUTING.md) | Instructions for contributing to the project. |
223
225
  | [.codeclimate.yml](https://github.com/astrogilda/tsbootstrap/blob/main/.codeclimate.yml) | Configuration for Code Climate quality checks. |
224
226
  | [.gitignore](https://github.com/astrogilda/tsbootstrap/blob/main/.gitignore) | Specifies files and folders to be ignored by Git. |
225
227
  | [.pre-commit-config.yaml](https://github.com/astrogilda/tsbootstrap/blob/main/.pre-commit-config.yaml) | Configuration for pre-commit hooks. |
@@ -261,108 +263,62 @@ The `tsbootstrap` package contains various modules that handle tasks such as boo
261
263
 
262
264
  </details>
263
265
 
264
- ---
265
266
 
266
- ## 🚀 Getting Started
267
+ ## 🗺 Roadmap
267
268
 
268
- ### ✔️ Prerequisites
269
+ This is an abridged version; for the complete and evolving list of plans and improvements, see [Issue #144](https://github.com/astrogilda/tsbootstrap/issues/144).
269
270
 
270
- Before you begin, ensure that you have the following prerequisites installed:
271
- > - `ℹ️ Requirement 1`
272
- > - `ℹ️ Requirement 2`
273
- > - `ℹ️ ...`
271
+ - **Performance and Scaling**: handling large datasets, distributed backend integration (`Dask`, `Spark`, `Ray`), profiling/optimization
272
+ - **Tuning and AutoML**: adaptive block length, adaptive resampling, evaluation based parameter selection
273
+ - **Real-time and Stream Data**: stream bootstraps, data update interface
274
+ - **Stage 2 `sktime` Integration**: evaluation module, datasets, benchmarks, sktime forecasters in bootstraps
275
+ - **API and Capability Extension**: panel/hierarchical data, exogenous data, update/stream, model state management
276
+ - **Scope Extension (TBD)**: time series augmentation, fully probabilistic models
274
277
 
275
- ### 📦 Installation and Setup
278
+ ## 🤝 Contributing
276
279
 
277
- This project comes with a `setup.sh` script to ease the setup process. The script will create a new Python virtual environment, install the necessary dependencies, and handle some version-specific installations.
280
+ Contributions are always welcome!
278
281
 
279
- Here are the steps to follow:
282
+ See our [good first issues ](https://github.com/astrogilda/tsbootstrap/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22)
283
+ for getting started.
280
284
 
281
- 1. Ensure that you have Python, Poetry, and Bash installed on your system. If not, you can install them using the links below:
282
- - [Python](https://www.python.org/downloads/)
283
- - [Poetry](https://python-poetry.org/docs/#installation)
284
- - [Bash](https://www.gnu.org/software/bash/)
285
+ Below is a quick start guide to contributing.
285
286
 
286
- 2. Clone the tsbootstrap repository:
287
- ```sh
288
- git clone https://github.com/astrogilda/tsbootstrap
289
- ```
287
+ ### Developer setup
290
288
 
291
- 3. Change to the project directory:
292
- ```sh
293
- cd tsbootstrap
294
- ```
289
+ 1. Fork the tsbootstrap repository
295
290
 
296
- 4. Make the `setup.sh` script executable:
291
+ 2. Clone the fork to local:
297
292
  ```sh
298
- chmod +x setup.sh
293
+ git clone https://github.com/astrogilda/tsbootstrap
299
294
  ```
300
295
 
301
- 5. Run the `setup.sh` script:
302
- ```sh
303
- ./setup.sh
304
- ```
296
+ 3. In the local repository root, set up a python environment, e.g., `venv` or `conda`.
305
297
 
306
- The `setup.sh` script sets up a Python environment using Poetry, locks and installs the necessary dependencies, and installs `dtaidistance` if the Python version is 3.9 or lower.
307
298
 
308
- 6. Activate the python shell:
309
- ```sh
310
- poetry shell
299
+ 4. Editable install via `pip`, including developer dependencies:
300
+ ```
301
+ pip install -e ".[dev]"
311
302
  ```
312
303
 
313
- That's it! You are now set up and ready to go.
314
-
315
- ### 🎮 Using tsbootstrap
316
-
317
- Here's a basic example using the Moving Block Bootstrap method:
318
-
319
- ```python
320
- from tsbootstrap import MovingBlockBootstrap, MovingBlockBootstrapConfig
321
- import numpy as np
322
-
323
- np.random.seed(0)
324
-
325
- # Create custom time series data
326
-
327
- n_samples = 1000
328
-
329
- y = np.random.normal(0, 1, n_samples).cumsum()
330
-
331
- x1 = np.arange(1, n_samples + 1).reshape(-1, 1)
332
- x2 = np.random.normal(0, 1, (n_samples, 1))
333
- exog = np.concatenate([x1, x2], axis=1)
304
+ The editable install ensures that changes to the package are reflected in
305
+ your environment.
334
306
 
335
- # Instantiate the bootstrap object
336
- mbb_config = MovingBlockBootstrapConfig(
337
- n_bootstraps=1000, rng=42, block_length=10
338
- )
339
- mbb = MovingBlockBootstrap(config=mbb_config)
307
+ ### Verifying the Installation
340
308
 
341
- # Generate the generator for 1000 bootstrapped samples
342
- bootstrapped_samples = bootstrap.bootstrap(n=1000)
309
+ After installation, you can verify that tsbootstrap has been installed correctly by checking its version or by trying to import it in Python:
343
310
  ```
344
-
345
- ### 🧪 Running Tests
346
- ```sh
347
- pytest tests/
311
+ python -c "import tsbootstrap; print(tsbootstrap.__version__)"
348
312
  ```
349
313
 
350
- ---
351
-
352
-
353
- ## 🗺 Roadmap
354
-
355
- > - [ ] `ℹ️ Task 1: in distributionbootstrap, allow mixture of distributions`
356
- > - [ ] `ℹ️ Task 2: allow fractional block_length`
357
- > - [ ] `ℹ️ Task 3: enable multi-processing`
358
- > - [ ] `ℹ️ Task 4: test -- for biascorrectblockbootstrap, see if the statistic on the bootstrapped sample is close to the statistic on the original sample`
314
+ This command should output the version number of tsbootstrap without any errors, indicating that the installation was successful.
359
315
 
316
+ That's it! You are now set up and ready to go. You can start using tsbootstrap for your time series bootstrapping needs.
360
317
 
361
- ## 🤝 Contributing
318
+ ### Contribution workflow
362
319
 
363
320
  Contributions are always welcome! Please follow these steps:
364
- 1. Fork the project repository. This creates a copy of the project on your account that you can modify without affecting the original project.
365
- 2. Clone the forked repository to your local machine using a Git client like Git or GitHub Desktop.
321
+
366
322
  3. Create a new branch with a descriptive name (e.g., `new-feature-branch` or `bugfix-issue-123`).
367
323
  ```sh
368
324
  git checkout -b new-feature-branch
@@ -379,6 +335,25 @@ git push origin new-feature-branch
379
335
  7. Create a new pull request to the original project repository. In the pull request, describe the changes you've made and why they're necessary.
380
336
  The project maintainers will review your changes and provide feedback or merge them into the main branch.
381
337
 
338
+ ### 🧪 Running Tests
339
+
340
+ To run all tests, in your developer environment, run:
341
+
342
+ ```sh
343
+ pytest tests/
344
+ ```
345
+
346
+ Individual bootstrap algorithms can be tested as follows:
347
+
348
+ ```python
349
+ from tsbootstrap.utils import check_estimator
350
+
351
+ check_estimator(my_bootstrap_algo)
352
+ ```
353
+
354
+ ### Contribution guide
355
+
356
+ For more detailed information on how to contribute, please refer to our [CONTRIBUTING.md](https://github.com/astrogilda/tsbootstrap/blob/main/CONTRIBUTING.md) guide.
382
357
  ---
383
358
 
384
359
  ## 📄 License
@@ -400,3 +375,67 @@ Thanks goes to these wonderful people:
400
375
  <!-- ALL-CONTRIBUTORS-LIST:END -->
401
376
 
402
377
  This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome!
378
+
379
+
380
+ ---
381
+
382
+
383
+ ## 📍 Time Series Bootstrapping
384
+ `tsbootstrap` is a comprehensive project designed to implement an array of bootstrapping techniques specifically tailored for time series data. This project is targeted towards data scientists, statisticians, economists, and other professionals or researchers who regularly work with time series data and require robust methods for generating bootstrapped copies of univariate and multivariate time series data.
385
+
386
+ ### Overview
387
+ Time series bootstrapping is a nuanced resampling method that is applied to time-dependent data. Traditional bootstrapping methods often assume independence between data points, which is an assumption that does not hold true for time series data where a data point is often dependent on previous data points. Time series bootstrapping techniques respect the chronological order and correlations of the data, providing more accurate estimates of uncertainty or variability.
388
+
389
+ ### Bootstrapping Methodology
390
+ The `tsbootstrap` project offers a diverse set of bootstrapping techniques that can be applied to either the entire input time series (classes prefixed with `Whole`), or after partitioning the data into blocks (classes prefixed with `Block`). These methodologies can be applied directly to the raw input data or to the residuals obtained after fitting one of the five statistical models defined in `time_series_model.py` (classes with `Residual` in their names).
391
+
392
+ ### Block Bootstrap
393
+ Block Bootstrap is a prevalent approach in time series bootstrapping. It involves resampling blocks of consecutive data points, thus respecting the internal structures of the data. There are several techniques under Block Bootstrap, each with its unique approach. `tsbootstrap` provides highly flexible block bootstrapping, allowing the user to specify the block length sampling, block generation, and block resampling strategies. For additional details, refer to `block_length_sampler.py`, `block_generator.py`, and `block_resampler.py`.
394
+
395
+ The Moving Block Bootstrap, Circular Block Bootstrap, Stationary Block Bootstrap, and NonOverlapping Block Bootstrap methods are all variations of the Block Bootstrap that use different methods to sample the data, maintaining various types of dependencies.
396
+
397
+ Bartlett's, Blackman's, Hamming's, Hanning's, and Tukey's Bootstrap methods are specific implementations of the Block Bootstrap that use different window shapes to taper the data, reducing the influence of data points far from the center. In `tsbootstrap`, these methods inherit from `MovingBlockBootstrap`, but can easily be modified to inherit from any of the other three base block bootstrapping classes.
398
+
399
+ Each method comes with its distinct strengths and weaknesses. The choice of method should be based on the characteristics of the data and the specific requirements of the analysis.
400
+
401
+ #### (i) Moving Block Bootstrap
402
+ This method is implemented in `MovingBlockBootstrap` and is used for time series data where blocks of data are resampled to maintain the dependency structure within the blocks. It's useful when the data has dependencies that need to be preserved. It's not recommended when the data does not have any significant dependencies.
403
+
404
+ #### (ii) Circular Block Bootstrap
405
+ This method is implemented in `CircularBlockBootstrap` and treats the data as if it is circular (the end of the data is next to the beginning of the data). It's useful when the data is cyclical or seasonal in nature. It's not recommended when the data does not have a cyclical or seasonal component.
406
+
407
+ #### (iii) Stationary Block Bootstrap
408
+ This method is implemented in `StationaryBlockBootstrap` and randomly resamples blocks of data with block lengths that follow a geometric distribution. It's useful for time series data where the degree of dependency needs to be preserved, and it doesn't require strict stationarity of the underlying process. It's not recommended when the data has strong seasonality or trend components which violate the weak dependence assumption.
409
+
410
+ #### (iv) NonOverlapping Block Bootstrap
411
+ This method is implemented in `NonOverlappingBlockBootstrap` and resamples blocks of data without overlap. It's useful when the data has dependencies that need to be preserved and when overfitting is a concern. It's not recommended when the data does not have any significant dependencies or when the introduction of bias due to non-overlapping selection is a concern.
412
+
413
+ #### (v) Bartlett's Bootstrap
414
+ Bartlett's method is a time series bootstrap method that uses a window or filter that tapers off as you move away from the center of the window. It's useful when you have a large amount of data and you want to reduce the influence of the data points far away from the center. This method is not advised when the tapering of data points is not desired or when the dataset is small as the tapered data points might contain valuable information. It is implemented in `BartlettsBootstrap`.
415
+
416
+ #### (vi) Blackman Bootstrap
417
+ Similar to Bartlett's method, Blackman's method uses a window that tapers off as you move away from the center of the window. The key difference is the shape of the window (Blackman window has a different shape than Bartlett). It's useful when you want to reduce the influence of the data points far from the center with a different window shape. It's not recommended when the dataset is small or tapering of data points is not desired. It is implemented in `BlackmanBootstrap`.
418
+
419
+ #### (vii) Hamming Bootstrap
420
+ Similar to the Bartlett and Blackman methods, the Hamming method uses a specific type of window function. It's useful when you want to reduce the influence of the data points far from the center with the Hamming window shape. It's not recommended for small datasets or when tapering of data points is not desired. It is implemented in `HammingBootstrap`.
421
+
422
+ #### (viii) Hanning Bootstrap
423
+ This method also uses a specific type of window function. It's useful when you want to reduce the influence of the data points far from the center with the Hanning window shape. It's not recommended for small datasets or when tapering of data points is not desired. It is implemented in `HanningBootstrap`.
424
+
425
+ #### (ix) Tukey Bootstrap
426
+ Similar to the Bartlett, Blackman, Hamming, and Hanning methods, the Tukey method uses a specific type of window function. It's useful when you want to reduce the influence of the data points far from the center with the Tukey window shape. It's not recommended for small datasets or when tapering of data points is not desired. It is implemented in `TukeyBootstrap`.
427
+
428
+ ### Residual Bootstrap
429
+ Residual Bootstrap is a method designed for time series data where a model is fit to the data, and the residuals (the difference between the observed and predicted data) are bootstrapped. It's particularly useful when a good model fit is available for the data. However, it's not recommended when a model fit is not available or is poor. `tsbootstrap` provides four time series models to fit to the input data -- `AutoReg`, `ARIMA`, `SARIMA`, and `VAR` (for multivariate input time series data). For more details, refer to `time_series_model.py` and `tsfit.py`.
430
+
431
+ ### Statistic-Preserving Bootstrap
432
+ Statistic-Preserving Bootstrap is a unique method designed to generate bootstrapped time series data while preserving a specific statistic of the original data. This method can be beneficial in scenarios where it's important to maintain the original data's characteristics in the bootstrapped samples. It is implemented in `StatisticPreservingBootstrap`.
433
+
434
+ ### Distribution Bootstrap
435
+ Distribution Bootstrap generates bootstrapped samples by fitting a distribution to the residuals and then generating new residuals from the fitted distribution. The new residuals are then added to the fitted values to create the bootstrapped samples. This method is based on the assumption that the residuals follow a specific distribution (like Gaussian, Poisson, etc). It's not recommended when the distribution of residuals is unknown or hard to determine. It is implemented in `DistributionBootstrap`.
436
+
437
+ ### Markov Bootstrap
438
+ Markov Bootstrap is used for bootstrapping time series data where the residuals of the data are presumed to follow a Markov process. This method is especially useful in scenarios where the current residual primarily depends on the previous one, with little to no dependency on residuals from further in the past. Markov Bootstrap technique is designed to preserve this dependency structure in the bootstrapped samples, making it particularly valuable for time series data that exhibits Markov properties. However, it's not advisable when the residuals of the time series data exhibit long-range dependencies, as the Markov assumption of limited dependency may not hold true. It is implemented in `MarkovBootstrap`. See `markov_sampler.py` for implementation details.
439
+
440
+ ### Sieve Bootstrap
441
+ Sieve Bootstrap is designed for handling dependent data, where the residuals of the time series data follow an autoregressive process. This method aims to preserve and simulate the dependencies inherent in the original data within the bootstrapped samples. It operates by approximating the autoregressive process ofthe residuals using a finite order autoregressive model. The order of the model is determined based on the data, and the residuals are then bootstrapped. The Sieve Bootstrap technique is particularly valuable for time series data that exhibits autoregressive properties. However, it's not advisable when the residuals of the time series data do not follow an autoregressive process. It is implemented in `SieveBootstrap`. See `time_series_simulator.py` for implementations details.