wavetrainer 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wavetrainer-0.0.1/LICENSE +21 -0
- wavetrainer-0.0.1/PKG-INFO +146 -0
- wavetrainer-0.0.1/README.md +121 -0
- wavetrainer-0.0.1/setup.cfg +4 -0
- wavetrainer-0.0.1/setup.py +42 -0
- wavetrainer-0.0.1/tests/__init__.py +0 -0
- wavetrainer-0.0.1/tests/trainer_test.py +38 -0
- wavetrainer-0.0.1/wavetrainer/__init__.py +10 -0
- wavetrainer-0.0.1/wavetrainer/calibrator/__init__.py +1 -0
- wavetrainer-0.0.1/wavetrainer/calibrator/calibrator.py +17 -0
- wavetrainer-0.0.1/wavetrainer/calibrator/calibrator_router.py +83 -0
- wavetrainer-0.0.1/wavetrainer/calibrator/mapie_calibrator.py +60 -0
- wavetrainer-0.0.1/wavetrainer/calibrator/vennabers_calibrator.py +59 -0
- wavetrainer-0.0.1/wavetrainer/create.py +22 -0
- wavetrainer-0.0.1/wavetrainer/exceptions.py +5 -0
- wavetrainer-0.0.1/wavetrainer/fit.py +30 -0
- wavetrainer-0.0.1/wavetrainer/load.py +8 -0
- wavetrainer-0.0.1/wavetrainer/model/__init__.py +1 -0
- wavetrainer-0.0.1/wavetrainer/model/catboost_model.py +80 -0
- wavetrainer-0.0.1/wavetrainer/model/model.py +23 -0
- wavetrainer-0.0.1/wavetrainer/model/model_router.py +83 -0
- wavetrainer-0.0.1/wavetrainer/model_type.py +28 -0
- wavetrainer-0.0.1/wavetrainer/params.py +19 -0
- wavetrainer-0.0.1/wavetrainer/reducer/__init__.py +1 -0
- wavetrainer-0.0.1/wavetrainer/reducer/base_selector_reducer.py +51 -0
- wavetrainer-0.0.1/wavetrainer/reducer/combined_reducer.py +76 -0
- wavetrainer-0.0.1/wavetrainer/reducer/constant_reducer.py +20 -0
- wavetrainer-0.0.1/wavetrainer/reducer/correlation_reducer.py +21 -0
- wavetrainer-0.0.1/wavetrainer/reducer/duplicate_reducer.py +20 -0
- wavetrainer-0.0.1/wavetrainer/reducer/reducer.py +13 -0
- wavetrainer-0.0.1/wavetrainer/selector/__init__.py +1 -0
- wavetrainer-0.0.1/wavetrainer/selector/selector.py +69 -0
- wavetrainer-0.0.1/wavetrainer/trainer.py +401 -0
- wavetrainer-0.0.1/wavetrainer/weights/__init__.py +0 -0
- wavetrainer-0.0.1/wavetrainer/weights/class_weights.py +74 -0
- wavetrainer-0.0.1/wavetrainer/weights/combined_weights.py +56 -0
- wavetrainer-0.0.1/wavetrainer/weights/exponential_weights.py +42 -0
- wavetrainer-0.0.1/wavetrainer/weights/linear_weights.py +42 -0
- wavetrainer-0.0.1/wavetrainer/weights/noop_weights.py +44 -0
- wavetrainer-0.0.1/wavetrainer/weights/sigmoid_weights.py +45 -0
- wavetrainer-0.0.1/wavetrainer/weights/weights.py +15 -0
- wavetrainer-0.0.1/wavetrainer/weights/weights_router.py +81 -0
- wavetrainer-0.0.1/wavetrainer/windower/__init__.py +1 -0
- wavetrainer-0.0.1/wavetrainer/windower/windower.py +70 -0
- wavetrainer-0.0.1/wavetrainer.egg-info/PKG-INFO +146 -0
- wavetrainer-0.0.1/wavetrainer.egg-info/SOURCES.txt +48 -0
- wavetrainer-0.0.1/wavetrainer.egg-info/dependency_links.txt +1 -0
- wavetrainer-0.0.1/wavetrainer.egg-info/not-zip-safe +1 -0
- wavetrainer-0.0.1/wavetrainer.egg-info/requires.txt +11 -0
- wavetrainer-0.0.1/wavetrainer.egg-info/top_level.txt +2 -0
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2025 Will Sackfield
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
@@ -0,0 +1,146 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: wavetrainer
|
3
|
+
Version: 0.0.1
|
4
|
+
Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
|
5
|
+
Home-page: https://github.com/8W9aG/wavetrain
|
6
|
+
Author: Will Sackfield
|
7
|
+
Author-email: will.sackfield@gmail.com
|
8
|
+
License: MIT
|
9
|
+
Keywords: machine-learning,ML,hyperparameter,features
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
12
|
+
Description-Content-Type: text/markdown
|
13
|
+
License-File: LICENSE
|
14
|
+
Requires-Dist: pandas>=2.2.3
|
15
|
+
Requires-Dist: optuna>=4.2.1
|
16
|
+
Requires-Dist: scikit-learn>=1.6.1
|
17
|
+
Requires-Dist: feature-engine>=1.8.3
|
18
|
+
Requires-Dist: tqdm>=4.67.1
|
19
|
+
Requires-Dist: numpy>=2.2.3
|
20
|
+
Requires-Dist: scipy>=1.15.2
|
21
|
+
Requires-Dist: catboost>=1.2.7
|
22
|
+
Requires-Dist: venn-abers>=1.4.6
|
23
|
+
Requires-Dist: mapie>=0.9.2
|
24
|
+
Requires-Dist: shapiq>=1.2.2
|
25
|
+
|
26
|
+
# wavetrainer
|
27
|
+
|
28
|
+
<a href="https://pypi.org/project/wavetrain/">
|
29
|
+
<img alt="PyPi" src="https://img.shields.io/pypi/v/wavetrain">
|
30
|
+
</a>
|
31
|
+
|
32
|
+
A library for automatically finding the optimal model within feature and hyperparameter space on time series models.
|
33
|
+
|
34
|
+
<p align="center">
|
35
|
+
<img src="wavetrain.png" alt="wavetrain" width="200"/>
|
36
|
+
</p>
|
37
|
+
|
38
|
+
## Dependencies :globe_with_meridians:
|
39
|
+
|
40
|
+
Python 3.11.6:
|
41
|
+
|
42
|
+
- [pandas](https://pandas.pydata.org/)
|
43
|
+
- [optuna](https://optuna.readthedocs.io/en/stable/)
|
44
|
+
- [scikit-learn](https://scikit-learn.org/)
|
45
|
+
- [feature-engine](https://feature-engine.trainindata.com/en/latest/)
|
46
|
+
- [tqdm](https://github.com/tqdm/tqdm)
|
47
|
+
- [numpy](https://numpy.org/)
|
48
|
+
- [scipy](https://scipy.org/)
|
49
|
+
- [catboost](https://catboost.ai/)
|
50
|
+
- [venn-abers](https://github.com/ip200/venn-abers)
|
51
|
+
- [mapie](https://mapie.readthedocs.io/en/stable/)
|
52
|
+
- [shapiq](https://github.com/mmschlk/shapiq)
|
53
|
+
|
54
|
+
## Raison D'être :thought_balloon:
|
55
|
+
|
56
|
+
`wavetrainer` aims to split out the various aspects of creating a good model into different composable pieces and searches the space of these different pieces to find an optimal model. This came about after doing code like this multiple times on multiple projects. This is specifically geared towards time series models, validating itself through walk-forward analysis.
|
57
|
+
|
58
|
+
## Architecture :triangular_ruler:
|
59
|
+
|
60
|
+
`wavetrainer` is an object orientated library. The entities are organised like so:
|
61
|
+
|
62
|
+
* **Trainer**: A sklearn compatible object that can fit and predict data.
|
63
|
+
* **Reducer**: An object that can reduce the feature space based on heuristics.
|
64
|
+
* **Weights**: An object that adds weights to the features.
|
65
|
+
* **Selector**: An object that can select which features to include from the training set.
|
66
|
+
* **Calibrator**: An object that can calibrate the probabilities produced by the model.
|
67
|
+
* **Model**: An object that represents the underlying model architecture being used.
|
68
|
+
* **Windower**: An object that represents the lookback window of the data.
|
69
|
+
|
70
|
+
## Installation :inbox_tray:
|
71
|
+
|
72
|
+
This is a python package hosted on pypi, so to install simply run the following command:
|
73
|
+
|
74
|
+
`pip install wavetrainer`
|
75
|
+
|
76
|
+
or install using this local repository:
|
77
|
+
|
78
|
+
`python setup.py install --old-and-unmanageable`
|
79
|
+
|
80
|
+
## Usage example :eyes:
|
81
|
+
|
82
|
+
The use of `wavetrainer` is entirely through code due to it being a library. It attempts to hide most of its complexity from the user, so it only has a few functions of relevance in its outward API.
|
83
|
+
|
84
|
+
### Training
|
85
|
+
|
86
|
+
To train a model:
|
87
|
+
|
88
|
+
```python
|
89
|
+
import wavetrain as wt
|
90
|
+
import pandas as pd
|
91
|
+
import numpy as np
|
92
|
+
import random
|
93
|
+
|
94
|
+
data_size = 10
|
95
|
+
df = pd.DataFrame(
|
96
|
+
np.random.randint(0, 30, size=data_size),
|
97
|
+
columns=["X"],
|
98
|
+
index=pd.date_range("20180101", periods=data_size),
|
99
|
+
)
|
100
|
+
df["Y"] = [random.choice([True, False]) for _ in range(data_size)]
|
101
|
+
|
102
|
+
X = df["X"]
|
103
|
+
Y = df["Y"]
|
104
|
+
|
105
|
+
wavetrain = wt.create("my_wavetrain")
|
106
|
+
wavetrain = wavetrain.fit(X, y=Y)
|
107
|
+
```
|
108
|
+
|
109
|
+
This will save it to the folder `my_wavetrain`.
|
110
|
+
|
111
|
+
### Load
|
112
|
+
|
113
|
+
To load a trainer (as well as its composite states):
|
114
|
+
|
115
|
+
```python
|
116
|
+
import wavetrain as wt
|
117
|
+
|
118
|
+
wavetrain = wt.load("my_wavetrain")
|
119
|
+
```
|
120
|
+
|
121
|
+
### Predict
|
122
|
+
|
123
|
+
To make a prediction from new data:
|
124
|
+
|
125
|
+
```python
|
126
|
+
import wavetrain as wt
|
127
|
+
import pandas as pd
|
128
|
+
import numpy as np
|
129
|
+
|
130
|
+
wavetrain = wt.load("my_wavetrain")
|
131
|
+
data_size = 1
|
132
|
+
df = pd.DataFrame(
|
133
|
+
np.random.randint(0, 30, size=data_size),
|
134
|
+
columns=["X"],
|
135
|
+
index=pd.date_range("20180101", periods=data_size),
|
136
|
+
)
|
137
|
+
X = df["X"]
|
138
|
+
|
139
|
+
preds = wavetrain.predict(X)
|
140
|
+
```
|
141
|
+
|
142
|
+
`preds` will now contain both the predictions and the probabilities associated with those predictions.
|
143
|
+
|
144
|
+
## License :memo:
|
145
|
+
|
146
|
+
The project is available under the [MIT License](LICENSE).
|
@@ -0,0 +1,121 @@
|
|
1
|
+
# wavetrainer
|
2
|
+
|
3
|
+
<a href="https://pypi.org/project/wavetrain/">
|
4
|
+
<img alt="PyPi" src="https://img.shields.io/pypi/v/wavetrain">
|
5
|
+
</a>
|
6
|
+
|
7
|
+
A library for automatically finding the optimal model within feature and hyperparameter space on time series models.
|
8
|
+
|
9
|
+
<p align="center">
|
10
|
+
<img src="wavetrain.png" alt="wavetrain" width="200"/>
|
11
|
+
</p>
|
12
|
+
|
13
|
+
## Dependencies :globe_with_meridians:
|
14
|
+
|
15
|
+
Python 3.11.6:
|
16
|
+
|
17
|
+
- [pandas](https://pandas.pydata.org/)
|
18
|
+
- [optuna](https://optuna.readthedocs.io/en/stable/)
|
19
|
+
- [scikit-learn](https://scikit-learn.org/)
|
20
|
+
- [feature-engine](https://feature-engine.trainindata.com/en/latest/)
|
21
|
+
- [tqdm](https://github.com/tqdm/tqdm)
|
22
|
+
- [numpy](https://numpy.org/)
|
23
|
+
- [scipy](https://scipy.org/)
|
24
|
+
- [catboost](https://catboost.ai/)
|
25
|
+
- [venn-abers](https://github.com/ip200/venn-abers)
|
26
|
+
- [mapie](https://mapie.readthedocs.io/en/stable/)
|
27
|
+
- [shapiq](https://github.com/mmschlk/shapiq)
|
28
|
+
|
29
|
+
## Raison D'être :thought_balloon:
|
30
|
+
|
31
|
+
`wavetrainer` aims to split out the various aspects of creating a good model into different composable pieces and searches the space of these different pieces to find an optimal model. This came about after doing code like this multiple times on multiple projects. This is specifically geared towards time series models, validating itself through walk-forward analysis.
|
32
|
+
|
33
|
+
## Architecture :triangular_ruler:
|
34
|
+
|
35
|
+
`wavetrainer` is an object orientated library. The entities are organised like so:
|
36
|
+
|
37
|
+
* **Trainer**: A sklearn compatible object that can fit and predict data.
|
38
|
+
* **Reducer**: An object that can reduce the feature space based on heuristics.
|
39
|
+
* **Weights**: An object that adds weights to the features.
|
40
|
+
* **Selector**: An object that can select which features to include from the training set.
|
41
|
+
* **Calibrator**: An object that can calibrate the probabilities produced by the model.
|
42
|
+
* **Model**: An object that represents the underlying model architecture being used.
|
43
|
+
* **Windower**: An object that represents the lookback window of the data.
|
44
|
+
|
45
|
+
## Installation :inbox_tray:
|
46
|
+
|
47
|
+
This is a python package hosted on pypi, so to install simply run the following command:
|
48
|
+
|
49
|
+
`pip install wavetrainer`
|
50
|
+
|
51
|
+
or install using this local repository:
|
52
|
+
|
53
|
+
`python setup.py install --old-and-unmanageable`
|
54
|
+
|
55
|
+
## Usage example :eyes:
|
56
|
+
|
57
|
+
The use of `wavetrainer` is entirely through code due to it being a library. It attempts to hide most of its complexity from the user, so it only has a few functions of relevance in its outward API.
|
58
|
+
|
59
|
+
### Training
|
60
|
+
|
61
|
+
To train a model:
|
62
|
+
|
63
|
+
```python
|
64
|
+
import wavetrain as wt
|
65
|
+
import pandas as pd
|
66
|
+
import numpy as np
|
67
|
+
import random
|
68
|
+
|
69
|
+
data_size = 10
|
70
|
+
df = pd.DataFrame(
|
71
|
+
np.random.randint(0, 30, size=data_size),
|
72
|
+
columns=["X"],
|
73
|
+
index=pd.date_range("20180101", periods=data_size),
|
74
|
+
)
|
75
|
+
df["Y"] = [random.choice([True, False]) for _ in range(data_size)]
|
76
|
+
|
77
|
+
X = df["X"]
|
78
|
+
Y = df["Y"]
|
79
|
+
|
80
|
+
wavetrain = wt.create("my_wavetrain")
|
81
|
+
wavetrain = wavetrain.fit(X, y=Y)
|
82
|
+
```
|
83
|
+
|
84
|
+
This will save it to the folder `my_wavetrain`.
|
85
|
+
|
86
|
+
### Load
|
87
|
+
|
88
|
+
To load a trainer (as well as its composite states):
|
89
|
+
|
90
|
+
```python
|
91
|
+
import wavetrain as wt
|
92
|
+
|
93
|
+
wavetrain = wt.load("my_wavetrain")
|
94
|
+
```
|
95
|
+
|
96
|
+
### Predict
|
97
|
+
|
98
|
+
To make a prediction from new data:
|
99
|
+
|
100
|
+
```python
|
101
|
+
import wavetrain as wt
|
102
|
+
import pandas as pd
|
103
|
+
import numpy as np
|
104
|
+
|
105
|
+
wavetrain = wt.load("my_wavetrain")
|
106
|
+
data_size = 1
|
107
|
+
df = pd.DataFrame(
|
108
|
+
np.random.randint(0, 30, size=data_size),
|
109
|
+
columns=["X"],
|
110
|
+
index=pd.date_range("20180101", periods=data_size),
|
111
|
+
)
|
112
|
+
X = df["X"]
|
113
|
+
|
114
|
+
preds = wavetrain.predict(X)
|
115
|
+
```
|
116
|
+
|
117
|
+
`preds` will now contain both the predictions and the probabilities associated with those predictions.
|
118
|
+
|
119
|
+
## License :memo:
|
120
|
+
|
121
|
+
The project is available under the [MIT License](LICENSE).
|
@@ -0,0 +1,42 @@
|
|
1
|
+
"""Setup wavetrain."""
|
2
|
+
from setuptools import setup, find_packages
|
3
|
+
from pathlib import Path
|
4
|
+
import typing
|
5
|
+
|
6
|
+
readme_path = Path(__file__).absolute().parent.joinpath('README.md')
|
7
|
+
long_description = readme_path.read_text(encoding='utf-8')
|
8
|
+
|
9
|
+
|
10
|
+
def install_requires() -> typing.List[str]:
|
11
|
+
"""Find the install requires strings from requirements.txt"""
|
12
|
+
requires = []
|
13
|
+
with open(
|
14
|
+
Path(__file__).absolute().parent.joinpath('requirements.txt'), "r"
|
15
|
+
) as requirments_txt_handle:
|
16
|
+
requires = [
|
17
|
+
x
|
18
|
+
for x in requirments_txt_handle
|
19
|
+
if not x.startswith(".") and not x.startswith("-e")
|
20
|
+
]
|
21
|
+
return requires
|
22
|
+
|
23
|
+
|
24
|
+
setup(
|
25
|
+
name='wavetrainer',
|
26
|
+
version='0.0.1',
|
27
|
+
description='A library for automatically finding the optimal model within feature and hyperparameter space.',
|
28
|
+
long_description=long_description,
|
29
|
+
long_description_content_type='text/markdown',
|
30
|
+
classifiers=[
|
31
|
+
'License :: OSI Approved :: MIT License',
|
32
|
+
'Programming Language :: Python :: 3',
|
33
|
+
],
|
34
|
+
keywords='machine-learning, ML, hyperparameter, features',
|
35
|
+
url='https://github.com/8W9aG/wavetrain',
|
36
|
+
author='Will Sackfield',
|
37
|
+
author_email='will.sackfield@gmail.com',
|
38
|
+
license='MIT',
|
39
|
+
install_requires=install_requires(),
|
40
|
+
zip_safe=False,
|
41
|
+
packages=find_packages()
|
42
|
+
)
|
File without changes
|
@@ -0,0 +1,38 @@
|
|
1
|
+
"""Tests for the trainer class."""
|
2
|
+
import datetime
|
3
|
+
import random
|
4
|
+
import tempfile
|
5
|
+
import unittest
|
6
|
+
|
7
|
+
import pandas as pd
|
8
|
+
|
9
|
+
from wavetrainer.trainer import Trainer
|
10
|
+
|
11
|
+
|
12
|
+
class TestTrainer(unittest.TestCase):
|
13
|
+
|
14
|
+
def test_trainer(self):
|
15
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
16
|
+
trainer = Trainer(tmpdir, walkforward_timedelta=datetime.timedelta(days=1), trials=10)
|
17
|
+
x_data = [i for i in range(100)]
|
18
|
+
x_index = [datetime.datetime(2022, 1, 1) + datetime.timedelta(days=i) for i in range(len(x_data))]
|
19
|
+
df = pd.DataFrame(
|
20
|
+
data={
|
21
|
+
"column1": x_data,
|
22
|
+
"column2": [(x * random.random()) + random.random() for x in x_data],
|
23
|
+
"column3": [(x / random.random()) - random.random() for x in x_data],
|
24
|
+
},
|
25
|
+
index=x_index,
|
26
|
+
)
|
27
|
+
y = pd.DataFrame(
|
28
|
+
data={
|
29
|
+
"y": [x % 2 == 0 for x in x_data],
|
30
|
+
"y2": [(x + 2) % 3 == 0 for x in x_data],
|
31
|
+
"y3": [float(x) + 2.0 for x in x_data],
|
32
|
+
},
|
33
|
+
index=df.index,
|
34
|
+
)
|
35
|
+
trainer.fit(df, y=y)
|
36
|
+
df = trainer.transform(df)
|
37
|
+
print("df:")
|
38
|
+
print(df)
|
@@ -0,0 +1 @@
|
|
1
|
+
"""The wavetrain calibrator module."""
|
@@ -0,0 +1,17 @@
|
|
1
|
+
"""The prototype calibrator class."""
|
2
|
+
|
3
|
+
from ..fit import Fit
|
4
|
+
from ..model.model import Model
|
5
|
+
from ..params import Params
|
6
|
+
|
7
|
+
|
8
|
+
class Calibrator(Params, Fit):
|
9
|
+
"""The prototype calibrator class."""
|
10
|
+
|
11
|
+
def __init__(self, model: Model):
|
12
|
+
self._model = model
|
13
|
+
|
14
|
+
@classmethod
|
15
|
+
def name(cls) -> str:
|
16
|
+
"""The name of the calibrator."""
|
17
|
+
raise NotImplementedError("name not implemented in parent class.")
|
@@ -0,0 +1,83 @@
|
|
1
|
+
"""A calibrator class that routes to other calibrators."""
|
2
|
+
|
3
|
+
import json
|
4
|
+
import os
|
5
|
+
from typing import Self
|
6
|
+
|
7
|
+
import optuna
|
8
|
+
import pandas as pd
|
9
|
+
|
10
|
+
from ..model.model import Model
|
11
|
+
from ..model_type import ModelType, determine_model_type
|
12
|
+
from .calibrator import Calibrator
|
13
|
+
from .mapie_calibrator import MAPIECalibrator
|
14
|
+
from .vennabers_calibrator import VennabersCalibrator
|
15
|
+
|
16
|
+
_CALIBRATOR_ROUTER_FILE = "calibrator_router.json"
|
17
|
+
_CALIBRATOR_KEY = "calibrator"
|
18
|
+
_CALIBRATORS = {
|
19
|
+
VennabersCalibrator.name(): VennabersCalibrator,
|
20
|
+
MAPIECalibrator.name(): MAPIECalibrator,
|
21
|
+
}
|
22
|
+
|
23
|
+
|
24
|
+
class CalibratorRouter(Calibrator):
|
25
|
+
"""A router that routes to a different calibrator class."""
|
26
|
+
|
27
|
+
_calibrator: Calibrator | None
|
28
|
+
|
29
|
+
def __init__(self, model: Model):
|
30
|
+
super().__init__(model)
|
31
|
+
self._calibrator = None
|
32
|
+
|
33
|
+
@classmethod
|
34
|
+
def name(cls) -> str:
|
35
|
+
return "router"
|
36
|
+
|
37
|
+
def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
|
38
|
+
pass
|
39
|
+
|
40
|
+
def load(self, folder: str) -> None:
|
41
|
+
with open(
|
42
|
+
os.path.join(folder, _CALIBRATOR_ROUTER_FILE), encoding="utf8"
|
43
|
+
) as handle:
|
44
|
+
params = json.load(handle)
|
45
|
+
calibrator = _CALIBRATORS[params[_CALIBRATOR_KEY]](self._model)
|
46
|
+
calibrator.load(folder)
|
47
|
+
self._calibrator = calibrator
|
48
|
+
|
49
|
+
def save(self, folder: str) -> None:
|
50
|
+
calibrator = self._calibrator
|
51
|
+
if calibrator is None:
|
52
|
+
raise ValueError("calibrator is null.")
|
53
|
+
calibrator.save(folder)
|
54
|
+
with open(
|
55
|
+
os.path.join(folder, _CALIBRATOR_ROUTER_FILE), "w", encoding="utf8"
|
56
|
+
) as handle:
|
57
|
+
json.dump(
|
58
|
+
{
|
59
|
+
_CALIBRATOR_KEY: calibrator.name(),
|
60
|
+
},
|
61
|
+
handle,
|
62
|
+
)
|
63
|
+
|
64
|
+
def fit(
|
65
|
+
self,
|
66
|
+
df: pd.DataFrame,
|
67
|
+
y: pd.Series | pd.DataFrame | None = None,
|
68
|
+
w: pd.Series | None = None,
|
69
|
+
) -> Self:
|
70
|
+
calibrator: Calibrator | None = None
|
71
|
+
if determine_model_type(df) == ModelType.REGRESSION:
|
72
|
+
calibrator = MAPIECalibrator(self._model)
|
73
|
+
else:
|
74
|
+
calibrator = VennabersCalibrator(self._model)
|
75
|
+
calibrator.fit(df, y=y, w=w)
|
76
|
+
self._calibrator = calibrator
|
77
|
+
return self
|
78
|
+
|
79
|
+
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
80
|
+
calibrator = self._calibrator
|
81
|
+
if calibrator is None:
|
82
|
+
raise ValueError("calibrator is null.")
|
83
|
+
return calibrator.transform(df)
|
@@ -0,0 +1,60 @@
|
|
1
|
+
"""A calibrator that implements MAPIE."""
|
2
|
+
|
3
|
+
import os
|
4
|
+
from typing import Self
|
5
|
+
|
6
|
+
import joblib # type: ignore
|
7
|
+
import optuna
|
8
|
+
import pandas as pd
|
9
|
+
from mapie.regression import MapieRegressor # type: ignore
|
10
|
+
|
11
|
+
from ..model.model import PROBABILITY_COLUMN_PREFIX, Model
|
12
|
+
from .calibrator import Calibrator
|
13
|
+
|
14
|
+
_CALIBRATOR_FILENAME = "mapie.joblib"
|
15
|
+
|
16
|
+
|
17
|
+
class MAPIECalibrator(Calibrator):
|
18
|
+
"""A class that uses MAPIE as a calibrator."""
|
19
|
+
|
20
|
+
def __init__(self, model: Model):
|
21
|
+
super().__init__(model)
|
22
|
+
self._mapie = MapieRegressor(model.estimator, method="plus")
|
23
|
+
|
24
|
+
@classmethod
|
25
|
+
def name(cls) -> str:
|
26
|
+
return "mapie"
|
27
|
+
|
28
|
+
def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
|
29
|
+
pass
|
30
|
+
|
31
|
+
def load(self, folder: str) -> None:
|
32
|
+
self._mapie = joblib.load(os.path.join(folder, _CALIBRATOR_FILENAME))
|
33
|
+
|
34
|
+
def save(self, folder: str) -> None:
|
35
|
+
joblib.dump(self._mapie, os.path.join(folder, _CALIBRATOR_FILENAME))
|
36
|
+
|
37
|
+
def fit(
|
38
|
+
self,
|
39
|
+
df: pd.DataFrame,
|
40
|
+
y: pd.Series | pd.DataFrame | None = None,
|
41
|
+
w: pd.Series | None = None,
|
42
|
+
) -> Self:
|
43
|
+
mapie = self._mapie
|
44
|
+
if mapie is None:
|
45
|
+
raise ValueError("mapie is null")
|
46
|
+
if y is None:
|
47
|
+
raise ValueError("y is null")
|
48
|
+
mapie.fit(df.to_numpy(), y.to_numpy())
|
49
|
+
return self
|
50
|
+
|
51
|
+
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
52
|
+
alpha = [0.05, 0.32]
|
53
|
+
_, y_pis = self._mapie.predict(df, alpha=alpha)
|
54
|
+
df = pd.DataFrame(data=None, index=df.index)
|
55
|
+
for i in range(y_pis.shape[1]):
|
56
|
+
for ii in range(y_pis.shape[2]):
|
57
|
+
df[f"{PROBABILITY_COLUMN_PREFIX}{alpha[i]}_{ii == 1}"] = (
|
58
|
+
y_pis[:, i, ii].flatten().tolist()
|
59
|
+
)
|
60
|
+
return df
|
@@ -0,0 +1,59 @@
|
|
1
|
+
"""A calibrator that implements venn abers."""
|
2
|
+
|
3
|
+
import os
|
4
|
+
from typing import Self
|
5
|
+
|
6
|
+
import joblib # type: ignore
|
7
|
+
import optuna
|
8
|
+
import pandas as pd
|
9
|
+
from venn_abers import VennAbers # type: ignore
|
10
|
+
|
11
|
+
from ..model.model import PROBABILITY_COLUMN_PREFIX, Model
|
12
|
+
from .calibrator import Calibrator
|
13
|
+
|
14
|
+
_CALIBRATOR_FILENAME = "vennabers.joblib"
|
15
|
+
|
16
|
+
|
17
|
+
class VennabersCalibrator(Calibrator):
|
18
|
+
"""A class that uses venn abers as a calibrator."""
|
19
|
+
|
20
|
+
def __init__(self, model: Model):
|
21
|
+
super().__init__(model)
|
22
|
+
self._vennabers = VennAbers()
|
23
|
+
|
24
|
+
@classmethod
|
25
|
+
def name(cls) -> str:
|
26
|
+
return "vennabers"
|
27
|
+
|
28
|
+
def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
|
29
|
+
pass
|
30
|
+
|
31
|
+
def load(self, folder: str) -> None:
|
32
|
+
self._vennabers = joblib.load(os.path.join(folder, _CALIBRATOR_FILENAME))
|
33
|
+
|
34
|
+
def save(self, folder: str) -> None:
|
35
|
+
joblib.dump(self._vennabers, os.path.join(folder, _CALIBRATOR_FILENAME))
|
36
|
+
|
37
|
+
def fit(
|
38
|
+
self,
|
39
|
+
df: pd.DataFrame,
|
40
|
+
y: pd.Series | pd.DataFrame | None = None,
|
41
|
+
w: pd.Series | None = None,
|
42
|
+
) -> Self:
|
43
|
+
vennabers = self._vennabers
|
44
|
+
if vennabers is None:
|
45
|
+
raise ValueError("vennabers is null")
|
46
|
+
if y is None:
|
47
|
+
raise ValueError("y is null")
|
48
|
+
prob_columns = [
|
49
|
+
x for x in df.columns.values if x.startswith(PROBABILITY_COLUMN_PREFIX)
|
50
|
+
]
|
51
|
+
vennabers.fit(df[prob_columns].to_numpy(), y.to_numpy())
|
52
|
+
return self
|
53
|
+
|
54
|
+
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
55
|
+
p_prime, _ = self._vennabers.predict_proba(df.to_numpy())
|
56
|
+
for i in range(p_prime.shape[1]):
|
57
|
+
prob = p_prime[:, i]
|
58
|
+
df[f"{PROBABILITY_COLUMN_PREFIX}{i}"] = prob
|
59
|
+
return df
|
@@ -0,0 +1,22 @@
|
|
1
|
+
"""A function for creating a new trainer."""
|
2
|
+
|
3
|
+
import datetime
|
4
|
+
|
5
|
+
from .trainer import Trainer
|
6
|
+
|
7
|
+
|
8
|
+
def create(
|
9
|
+
folder: str,
|
10
|
+
walkforward_timedelta: datetime.timedelta = datetime.timedelta(days=1.0),
|
11
|
+
test_size: float | datetime.timedelta | None = None,
|
12
|
+
validation_size: float | datetime.timedelta | None = None,
|
13
|
+
dt_column: str | None = None,
|
14
|
+
) -> Trainer:
|
15
|
+
"""Create a trainer."""
|
16
|
+
return Trainer(
|
17
|
+
folder,
|
18
|
+
walkforward_timedelta,
|
19
|
+
test_size=test_size,
|
20
|
+
validation_size=validation_size,
|
21
|
+
dt_column=dt_column,
|
22
|
+
)
|
@@ -0,0 +1,30 @@
|
|
1
|
+
"""A prototype class implementing a fit method."""
|
2
|
+
|
3
|
+
from typing import Self
|
4
|
+
|
5
|
+
import pandas as pd
|
6
|
+
|
7
|
+
|
8
|
+
class Fit:
|
9
|
+
"""The prototype fit class."""
|
10
|
+
|
11
|
+
def fit(
|
12
|
+
self,
|
13
|
+
df: pd.DataFrame,
|
14
|
+
y: pd.Series | pd.DataFrame | None = None,
|
15
|
+
w: pd.Series | None = None,
|
16
|
+
) -> Self:
|
17
|
+
"""Fit the dataframe."""
|
18
|
+
raise NotImplementedError("fit not implemented in parent class.")
|
19
|
+
|
20
|
+
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
21
|
+
"""Transform the dataframe."""
|
22
|
+
raise NotImplementedError("transform not implemented in parent class.")
|
23
|
+
|
24
|
+
def fit_transform(
|
25
|
+
self,
|
26
|
+
df: pd.DataFrame,
|
27
|
+
y: pd.Series | pd.DataFrame | None = None,
|
28
|
+
) -> pd.DataFrame:
|
29
|
+
"""Fit and then trasnfrom the dataframe."""
|
30
|
+
return self.fit(df, y=y).transform(df)
|
@@ -0,0 +1 @@
|
|
1
|
+
"""The wavetrain model module."""
|