wavetrainer 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. wavetrainer-0.0.1/LICENSE +21 -0
  2. wavetrainer-0.0.1/PKG-INFO +146 -0
  3. wavetrainer-0.0.1/README.md +121 -0
  4. wavetrainer-0.0.1/setup.cfg +4 -0
  5. wavetrainer-0.0.1/setup.py +42 -0
  6. wavetrainer-0.0.1/tests/__init__.py +0 -0
  7. wavetrainer-0.0.1/tests/trainer_test.py +38 -0
  8. wavetrainer-0.0.1/wavetrainer/__init__.py +10 -0
  9. wavetrainer-0.0.1/wavetrainer/calibrator/__init__.py +1 -0
  10. wavetrainer-0.0.1/wavetrainer/calibrator/calibrator.py +17 -0
  11. wavetrainer-0.0.1/wavetrainer/calibrator/calibrator_router.py +83 -0
  12. wavetrainer-0.0.1/wavetrainer/calibrator/mapie_calibrator.py +60 -0
  13. wavetrainer-0.0.1/wavetrainer/calibrator/vennabers_calibrator.py +59 -0
  14. wavetrainer-0.0.1/wavetrainer/create.py +22 -0
  15. wavetrainer-0.0.1/wavetrainer/exceptions.py +5 -0
  16. wavetrainer-0.0.1/wavetrainer/fit.py +30 -0
  17. wavetrainer-0.0.1/wavetrainer/load.py +8 -0
  18. wavetrainer-0.0.1/wavetrainer/model/__init__.py +1 -0
  19. wavetrainer-0.0.1/wavetrainer/model/catboost_model.py +80 -0
  20. wavetrainer-0.0.1/wavetrainer/model/model.py +23 -0
  21. wavetrainer-0.0.1/wavetrainer/model/model_router.py +83 -0
  22. wavetrainer-0.0.1/wavetrainer/model_type.py +28 -0
  23. wavetrainer-0.0.1/wavetrainer/params.py +19 -0
  24. wavetrainer-0.0.1/wavetrainer/reducer/__init__.py +1 -0
  25. wavetrainer-0.0.1/wavetrainer/reducer/base_selector_reducer.py +51 -0
  26. wavetrainer-0.0.1/wavetrainer/reducer/combined_reducer.py +76 -0
  27. wavetrainer-0.0.1/wavetrainer/reducer/constant_reducer.py +20 -0
  28. wavetrainer-0.0.1/wavetrainer/reducer/correlation_reducer.py +21 -0
  29. wavetrainer-0.0.1/wavetrainer/reducer/duplicate_reducer.py +20 -0
  30. wavetrainer-0.0.1/wavetrainer/reducer/reducer.py +13 -0
  31. wavetrainer-0.0.1/wavetrainer/selector/__init__.py +1 -0
  32. wavetrainer-0.0.1/wavetrainer/selector/selector.py +69 -0
  33. wavetrainer-0.0.1/wavetrainer/trainer.py +401 -0
  34. wavetrainer-0.0.1/wavetrainer/weights/__init__.py +0 -0
  35. wavetrainer-0.0.1/wavetrainer/weights/class_weights.py +74 -0
  36. wavetrainer-0.0.1/wavetrainer/weights/combined_weights.py +56 -0
  37. wavetrainer-0.0.1/wavetrainer/weights/exponential_weights.py +42 -0
  38. wavetrainer-0.0.1/wavetrainer/weights/linear_weights.py +42 -0
  39. wavetrainer-0.0.1/wavetrainer/weights/noop_weights.py +44 -0
  40. wavetrainer-0.0.1/wavetrainer/weights/sigmoid_weights.py +45 -0
  41. wavetrainer-0.0.1/wavetrainer/weights/weights.py +15 -0
  42. wavetrainer-0.0.1/wavetrainer/weights/weights_router.py +81 -0
  43. wavetrainer-0.0.1/wavetrainer/windower/__init__.py +1 -0
  44. wavetrainer-0.0.1/wavetrainer/windower/windower.py +70 -0
  45. wavetrainer-0.0.1/wavetrainer.egg-info/PKG-INFO +146 -0
  46. wavetrainer-0.0.1/wavetrainer.egg-info/SOURCES.txt +48 -0
  47. wavetrainer-0.0.1/wavetrainer.egg-info/dependency_links.txt +1 -0
  48. wavetrainer-0.0.1/wavetrainer.egg-info/not-zip-safe +1 -0
  49. wavetrainer-0.0.1/wavetrainer.egg-info/requires.txt +11 -0
  50. wavetrainer-0.0.1/wavetrainer.egg-info/top_level.txt +2 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Will Sackfield
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,146 @@
1
+ Metadata-Version: 2.1
2
+ Name: wavetrainer
3
+ Version: 0.0.1
4
+ Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
+ Home-page: https://github.com/8W9aG/wavetrain
6
+ Author: Will Sackfield
7
+ Author-email: will.sackfield@gmail.com
8
+ License: MIT
9
+ Keywords: machine-learning,ML,hyperparameter,features
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: pandas>=2.2.3
15
+ Requires-Dist: optuna>=4.2.1
16
+ Requires-Dist: scikit-learn>=1.6.1
17
+ Requires-Dist: feature-engine>=1.8.3
18
+ Requires-Dist: tqdm>=4.67.1
19
+ Requires-Dist: numpy>=2.2.3
20
+ Requires-Dist: scipy>=1.15.2
21
+ Requires-Dist: catboost>=1.2.7
22
+ Requires-Dist: venn-abers>=1.4.6
23
+ Requires-Dist: mapie>=0.9.2
24
+ Requires-Dist: shapiq>=1.2.2
25
+
26
+ # wavetrainer
27
+
28
+ <a href="https://pypi.org/project/wavetrain/">
29
+ <img alt="PyPi" src="https://img.shields.io/pypi/v/wavetrain">
30
+ </a>
31
+
32
+ A library for automatically finding the optimal model within feature and hyperparameter space on time series models.
33
+
34
+ <p align="center">
35
+ <img src="wavetrain.png" alt="wavetrain" width="200"/>
36
+ </p>
37
+
38
+ ## Dependencies :globe_with_meridians:
39
+
40
+ Python 3.11.6:
41
+
42
+ - [pandas](https://pandas.pydata.org/)
43
+ - [optuna](https://optuna.readthedocs.io/en/stable/)
44
+ - [scikit-learn](https://scikit-learn.org/)
45
+ - [feature-engine](https://feature-engine.trainindata.com/en/latest/)
46
+ - [tqdm](https://github.com/tqdm/tqdm)
47
+ - [numpy](https://numpy.org/)
48
+ - [scipy](https://scipy.org/)
49
+ - [catboost](https://catboost.ai/)
50
+ - [venn-abers](https://github.com/ip200/venn-abers)
51
+ - [mapie](https://mapie.readthedocs.io/en/stable/)
52
+ - [shapiq](https://github.com/mmschlk/shapiq)
53
+
54
+ ## Raison D'être :thought_balloon:
55
+
56
+ `wavetrainer` aims to split out the various aspects of creating a good model into different composable pieces and searches the space of these different pieces to find an optimal model. This came about after doing code like this multiple times on multiple projects. This is specifically geared towards time series models, validating itself through walk-forward analysis.
57
+
58
+ ## Architecture :triangular_ruler:
59
+
60
+ `wavetrainer` is an object orientated library. The entities are organised like so:
61
+
62
+ * **Trainer**: A sklearn compatible object that can fit and predict data.
63
+ * **Reducer**: An object that can reduce the feature space based on heuristics.
64
+ * **Weights**: An object that adds weights to the features.
65
+ * **Selector**: An object that can select which features to include from the training set.
66
+ * **Calibrator**: An object that can calibrate the probabilities produced by the model.
67
+ * **Model**: An object that represents the underlying model architecture being used.
68
+ * **Windower**: An object that represents the lookback window of the data.
69
+
70
+ ## Installation :inbox_tray:
71
+
72
+ This is a python package hosted on pypi, so to install simply run the following command:
73
+
74
+ `pip install wavetrainer`
75
+
76
+ or install using this local repository:
77
+
78
+ `python setup.py install --old-and-unmanageable`
79
+
80
+ ## Usage example :eyes:
81
+
82
+ The use of `wavetrainer` is entirely through code due to it being a library. It attempts to hide most of its complexity from the user, so it only has a few functions of relevance in its outward API.
83
+
84
+ ### Training
85
+
86
+ To train a model:
87
+
88
+ ```python
89
+ import wavetrain as wt
90
+ import pandas as pd
91
+ import numpy as np
92
+ import random
93
+
94
+ data_size = 10
95
+ df = pd.DataFrame(
96
+ np.random.randint(0, 30, size=data_size),
97
+ columns=["X"],
98
+ index=pd.date_range("20180101", periods=data_size),
99
+ )
100
+ df["Y"] = [random.choice([True, False]) for _ in range(data_size)]
101
+
102
+ X = df["X"]
103
+ Y = df["Y"]
104
+
105
+ wavetrain = wt.create("my_wavetrain")
106
+ wavetrain = wavetrain.fit(X, y=Y)
107
+ ```
108
+
109
+ This will save it to the folder `my_wavetrain`.
110
+
111
+ ### Load
112
+
113
+ To load a trainer (as well as its composite states):
114
+
115
+ ```python
116
+ import wavetrain as wt
117
+
118
+ wavetrain = wt.load("my_wavetrain")
119
+ ```
120
+
121
+ ### Predict
122
+
123
+ To make a prediction from new data:
124
+
125
+ ```python
126
+ import wavetrain as wt
127
+ import pandas as pd
128
+ import numpy as np
129
+
130
+ wavetrain = wt.load("my_wavetrain")
131
+ data_size = 1
132
+ df = pd.DataFrame(
133
+ np.random.randint(0, 30, size=data_size),
134
+ columns=["X"],
135
+ index=pd.date_range("20180101", periods=data_size),
136
+ )
137
+ X = df["X"]
138
+
139
+ preds = wavetrain.predict(X)
140
+ ```
141
+
142
+ `preds` will now contain both the predictions and the probabilities associated with those predictions.
143
+
144
+ ## License :memo:
145
+
146
+ The project is available under the [MIT License](LICENSE).
@@ -0,0 +1,121 @@
1
+ # wavetrainer
2
+
3
+ <a href="https://pypi.org/project/wavetrain/">
4
+ <img alt="PyPi" src="https://img.shields.io/pypi/v/wavetrain">
5
+ </a>
6
+
7
+ A library for automatically finding the optimal model within feature and hyperparameter space on time series models.
8
+
9
+ <p align="center">
10
+ <img src="wavetrain.png" alt="wavetrain" width="200"/>
11
+ </p>
12
+
13
+ ## Dependencies :globe_with_meridians:
14
+
15
+ Python 3.11.6:
16
+
17
+ - [pandas](https://pandas.pydata.org/)
18
+ - [optuna](https://optuna.readthedocs.io/en/stable/)
19
+ - [scikit-learn](https://scikit-learn.org/)
20
+ - [feature-engine](https://feature-engine.trainindata.com/en/latest/)
21
+ - [tqdm](https://github.com/tqdm/tqdm)
22
+ - [numpy](https://numpy.org/)
23
+ - [scipy](https://scipy.org/)
24
+ - [catboost](https://catboost.ai/)
25
+ - [venn-abers](https://github.com/ip200/venn-abers)
26
+ - [mapie](https://mapie.readthedocs.io/en/stable/)
27
+ - [shapiq](https://github.com/mmschlk/shapiq)
28
+
29
+ ## Raison D'être :thought_balloon:
30
+
31
+ `wavetrainer` aims to split out the various aspects of creating a good model into different composable pieces and searches the space of these different pieces to find an optimal model. This came about after doing code like this multiple times on multiple projects. This is specifically geared towards time series models, validating itself through walk-forward analysis.
32
+
33
+ ## Architecture :triangular_ruler:
34
+
35
+ `wavetrainer` is an object orientated library. The entities are organised like so:
36
+
37
+ * **Trainer**: A sklearn compatible object that can fit and predict data.
38
+ * **Reducer**: An object that can reduce the feature space based on heuristics.
39
+ * **Weights**: An object that adds weights to the features.
40
+ * **Selector**: An object that can select which features to include from the training set.
41
+ * **Calibrator**: An object that can calibrate the probabilities produced by the model.
42
+ * **Model**: An object that represents the underlying model architecture being used.
43
+ * **Windower**: An object that represents the lookback window of the data.
44
+
45
+ ## Installation :inbox_tray:
46
+
47
+ This is a python package hosted on pypi, so to install simply run the following command:
48
+
49
+ `pip install wavetrainer`
50
+
51
+ or install using this local repository:
52
+
53
+ `python setup.py install --old-and-unmanageable`
54
+
55
+ ## Usage example :eyes:
56
+
57
+ The use of `wavetrainer` is entirely through code due to it being a library. It attempts to hide most of its complexity from the user, so it only has a few functions of relevance in its outward API.
58
+
59
+ ### Training
60
+
61
+ To train a model:
62
+
63
+ ```python
64
+ import wavetrain as wt
65
+ import pandas as pd
66
+ import numpy as np
67
+ import random
68
+
69
+ data_size = 10
70
+ df = pd.DataFrame(
71
+ np.random.randint(0, 30, size=data_size),
72
+ columns=["X"],
73
+ index=pd.date_range("20180101", periods=data_size),
74
+ )
75
+ df["Y"] = [random.choice([True, False]) for _ in range(data_size)]
76
+
77
+ X = df["X"]
78
+ Y = df["Y"]
79
+
80
+ wavetrain = wt.create("my_wavetrain")
81
+ wavetrain = wavetrain.fit(X, y=Y)
82
+ ```
83
+
84
+ This will save it to the folder `my_wavetrain`.
85
+
86
+ ### Load
87
+
88
+ To load a trainer (as well as its composite states):
89
+
90
+ ```python
91
+ import wavetrain as wt
92
+
93
+ wavetrain = wt.load("my_wavetrain")
94
+ ```
95
+
96
+ ### Predict
97
+
98
+ To make a prediction from new data:
99
+
100
+ ```python
101
+ import wavetrain as wt
102
+ import pandas as pd
103
+ import numpy as np
104
+
105
+ wavetrain = wt.load("my_wavetrain")
106
+ data_size = 1
107
+ df = pd.DataFrame(
108
+ np.random.randint(0, 30, size=data_size),
109
+ columns=["X"],
110
+ index=pd.date_range("20180101", periods=data_size),
111
+ )
112
+ X = df["X"]
113
+
114
+ preds = wavetrain.predict(X)
115
+ ```
116
+
117
+ `preds` will now contain both the predictions and the probabilities associated with those predictions.
118
+
119
+ ## License :memo:
120
+
121
+ The project is available under the [MIT License](LICENSE).
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,42 @@
1
+ """Setup wavetrain."""
2
+ from setuptools import setup, find_packages
3
+ from pathlib import Path
4
+ import typing
5
+
6
+ readme_path = Path(__file__).absolute().parent.joinpath('README.md')
7
+ long_description = readme_path.read_text(encoding='utf-8')
8
+
9
+
10
+ def install_requires() -> typing.List[str]:
11
+ """Find the install requires strings from requirements.txt"""
12
+ requires = []
13
+ with open(
14
+ Path(__file__).absolute().parent.joinpath('requirements.txt'), "r"
15
+ ) as requirments_txt_handle:
16
+ requires = [
17
+ x
18
+ for x in requirments_txt_handle
19
+ if not x.startswith(".") and not x.startswith("-e")
20
+ ]
21
+ return requires
22
+
23
+
24
+ setup(
25
+ name='wavetrainer',
26
+ version='0.0.1',
27
+ description='A library for automatically finding the optimal model within feature and hyperparameter space.',
28
+ long_description=long_description,
29
+ long_description_content_type='text/markdown',
30
+ classifiers=[
31
+ 'License :: OSI Approved :: MIT License',
32
+ 'Programming Language :: Python :: 3',
33
+ ],
34
+ keywords='machine-learning, ML, hyperparameter, features',
35
+ url='https://github.com/8W9aG/wavetrain',
36
+ author='Will Sackfield',
37
+ author_email='will.sackfield@gmail.com',
38
+ license='MIT',
39
+ install_requires=install_requires(),
40
+ zip_safe=False,
41
+ packages=find_packages()
42
+ )
File without changes
@@ -0,0 +1,38 @@
1
+ """Tests for the trainer class."""
2
+ import datetime
3
+ import random
4
+ import tempfile
5
+ import unittest
6
+
7
+ import pandas as pd
8
+
9
+ from wavetrainer.trainer import Trainer
10
+
11
+
12
+ class TestTrainer(unittest.TestCase):
13
+
14
+ def test_trainer(self):
15
+ with tempfile.TemporaryDirectory() as tmpdir:
16
+ trainer = Trainer(tmpdir, walkforward_timedelta=datetime.timedelta(days=1), trials=10)
17
+ x_data = [i for i in range(100)]
18
+ x_index = [datetime.datetime(2022, 1, 1) + datetime.timedelta(days=i) for i in range(len(x_data))]
19
+ df = pd.DataFrame(
20
+ data={
21
+ "column1": x_data,
22
+ "column2": [(x * random.random()) + random.random() for x in x_data],
23
+ "column3": [(x / random.random()) - random.random() for x in x_data],
24
+ },
25
+ index=x_index,
26
+ )
27
+ y = pd.DataFrame(
28
+ data={
29
+ "y": [x % 2 == 0 for x in x_data],
30
+ "y2": [(x + 2) % 3 == 0 for x in x_data],
31
+ "y3": [float(x) + 2.0 for x in x_data],
32
+ },
33
+ index=df.index,
34
+ )
35
+ trainer.fit(df, y=y)
36
+ df = trainer.transform(df)
37
+ print("df:")
38
+ print(df)
@@ -0,0 +1,10 @@
1
+ """The wavetrain main module."""
2
+
3
+ from .create import create
4
+ from .load import load
5
+
6
+ __VERSION__ = "0.0.1"
7
+ __all__ = (
8
+ "create",
9
+ "load",
10
+ )
@@ -0,0 +1 @@
1
+ """The wavetrain calibrator module."""
@@ -0,0 +1,17 @@
1
+ """The prototype calibrator class."""
2
+
3
+ from ..fit import Fit
4
+ from ..model.model import Model
5
+ from ..params import Params
6
+
7
+
8
+ class Calibrator(Params, Fit):
9
+ """The prototype calibrator class."""
10
+
11
+ def __init__(self, model: Model):
12
+ self._model = model
13
+
14
+ @classmethod
15
+ def name(cls) -> str:
16
+ """The name of the calibrator."""
17
+ raise NotImplementedError("name not implemented in parent class.")
@@ -0,0 +1,83 @@
1
+ """A calibrator class that routes to other calibrators."""
2
+
3
+ import json
4
+ import os
5
+ from typing import Self
6
+
7
+ import optuna
8
+ import pandas as pd
9
+
10
+ from ..model.model import Model
11
+ from ..model_type import ModelType, determine_model_type
12
+ from .calibrator import Calibrator
13
+ from .mapie_calibrator import MAPIECalibrator
14
+ from .vennabers_calibrator import VennabersCalibrator
15
+
16
+ _CALIBRATOR_ROUTER_FILE = "calibrator_router.json"
17
+ _CALIBRATOR_KEY = "calibrator"
18
+ _CALIBRATORS = {
19
+ VennabersCalibrator.name(): VennabersCalibrator,
20
+ MAPIECalibrator.name(): MAPIECalibrator,
21
+ }
22
+
23
+
24
+ class CalibratorRouter(Calibrator):
25
+ """A router that routes to a different calibrator class."""
26
+
27
+ _calibrator: Calibrator | None
28
+
29
+ def __init__(self, model: Model):
30
+ super().__init__(model)
31
+ self._calibrator = None
32
+
33
+ @classmethod
34
+ def name(cls) -> str:
35
+ return "router"
36
+
37
+ def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
38
+ pass
39
+
40
+ def load(self, folder: str) -> None:
41
+ with open(
42
+ os.path.join(folder, _CALIBRATOR_ROUTER_FILE), encoding="utf8"
43
+ ) as handle:
44
+ params = json.load(handle)
45
+ calibrator = _CALIBRATORS[params[_CALIBRATOR_KEY]](self._model)
46
+ calibrator.load(folder)
47
+ self._calibrator = calibrator
48
+
49
+ def save(self, folder: str) -> None:
50
+ calibrator = self._calibrator
51
+ if calibrator is None:
52
+ raise ValueError("calibrator is null.")
53
+ calibrator.save(folder)
54
+ with open(
55
+ os.path.join(folder, _CALIBRATOR_ROUTER_FILE), "w", encoding="utf8"
56
+ ) as handle:
57
+ json.dump(
58
+ {
59
+ _CALIBRATOR_KEY: calibrator.name(),
60
+ },
61
+ handle,
62
+ )
63
+
64
+ def fit(
65
+ self,
66
+ df: pd.DataFrame,
67
+ y: pd.Series | pd.DataFrame | None = None,
68
+ w: pd.Series | None = None,
69
+ ) -> Self:
70
+ calibrator: Calibrator | None = None
71
+ if determine_model_type(df) == ModelType.REGRESSION:
72
+ calibrator = MAPIECalibrator(self._model)
73
+ else:
74
+ calibrator = VennabersCalibrator(self._model)
75
+ calibrator.fit(df, y=y, w=w)
76
+ self._calibrator = calibrator
77
+ return self
78
+
79
+ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
80
+ calibrator = self._calibrator
81
+ if calibrator is None:
82
+ raise ValueError("calibrator is null.")
83
+ return calibrator.transform(df)
@@ -0,0 +1,60 @@
1
+ """A calibrator that implements MAPIE."""
2
+
3
+ import os
4
+ from typing import Self
5
+
6
+ import joblib # type: ignore
7
+ import optuna
8
+ import pandas as pd
9
+ from mapie.regression import MapieRegressor # type: ignore
10
+
11
+ from ..model.model import PROBABILITY_COLUMN_PREFIX, Model
12
+ from .calibrator import Calibrator
13
+
14
+ _CALIBRATOR_FILENAME = "mapie.joblib"
15
+
16
+
17
+ class MAPIECalibrator(Calibrator):
18
+ """A class that uses MAPIE as a calibrator."""
19
+
20
+ def __init__(self, model: Model):
21
+ super().__init__(model)
22
+ self._mapie = MapieRegressor(model.estimator, method="plus")
23
+
24
+ @classmethod
25
+ def name(cls) -> str:
26
+ return "mapie"
27
+
28
+ def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
29
+ pass
30
+
31
+ def load(self, folder: str) -> None:
32
+ self._mapie = joblib.load(os.path.join(folder, _CALIBRATOR_FILENAME))
33
+
34
+ def save(self, folder: str) -> None:
35
+ joblib.dump(self._mapie, os.path.join(folder, _CALIBRATOR_FILENAME))
36
+
37
+ def fit(
38
+ self,
39
+ df: pd.DataFrame,
40
+ y: pd.Series | pd.DataFrame | None = None,
41
+ w: pd.Series | None = None,
42
+ ) -> Self:
43
+ mapie = self._mapie
44
+ if mapie is None:
45
+ raise ValueError("mapie is null")
46
+ if y is None:
47
+ raise ValueError("y is null")
48
+ mapie.fit(df.to_numpy(), y.to_numpy())
49
+ return self
50
+
51
+ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
52
+ alpha = [0.05, 0.32]
53
+ _, y_pis = self._mapie.predict(df, alpha=alpha)
54
+ df = pd.DataFrame(data=None, index=df.index)
55
+ for i in range(y_pis.shape[1]):
56
+ for ii in range(y_pis.shape[2]):
57
+ df[f"{PROBABILITY_COLUMN_PREFIX}{alpha[i]}_{ii == 1}"] = (
58
+ y_pis[:, i, ii].flatten().tolist()
59
+ )
60
+ return df
@@ -0,0 +1,59 @@
1
+ """A calibrator that implements venn abers."""
2
+
3
+ import os
4
+ from typing import Self
5
+
6
+ import joblib # type: ignore
7
+ import optuna
8
+ import pandas as pd
9
+ from venn_abers import VennAbers # type: ignore
10
+
11
+ from ..model.model import PROBABILITY_COLUMN_PREFIX, Model
12
+ from .calibrator import Calibrator
13
+
14
+ _CALIBRATOR_FILENAME = "vennabers.joblib"
15
+
16
+
17
+ class VennabersCalibrator(Calibrator):
18
+ """A class that uses venn abers as a calibrator."""
19
+
20
+ def __init__(self, model: Model):
21
+ super().__init__(model)
22
+ self._vennabers = VennAbers()
23
+
24
+ @classmethod
25
+ def name(cls) -> str:
26
+ return "vennabers"
27
+
28
+ def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
29
+ pass
30
+
31
+ def load(self, folder: str) -> None:
32
+ self._vennabers = joblib.load(os.path.join(folder, _CALIBRATOR_FILENAME))
33
+
34
+ def save(self, folder: str) -> None:
35
+ joblib.dump(self._vennabers, os.path.join(folder, _CALIBRATOR_FILENAME))
36
+
37
+ def fit(
38
+ self,
39
+ df: pd.DataFrame,
40
+ y: pd.Series | pd.DataFrame | None = None,
41
+ w: pd.Series | None = None,
42
+ ) -> Self:
43
+ vennabers = self._vennabers
44
+ if vennabers is None:
45
+ raise ValueError("vennabers is null")
46
+ if y is None:
47
+ raise ValueError("y is null")
48
+ prob_columns = [
49
+ x for x in df.columns.values if x.startswith(PROBABILITY_COLUMN_PREFIX)
50
+ ]
51
+ vennabers.fit(df[prob_columns].to_numpy(), y.to_numpy())
52
+ return self
53
+
54
+ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
55
+ p_prime, _ = self._vennabers.predict_proba(df.to_numpy())
56
+ for i in range(p_prime.shape[1]):
57
+ prob = p_prime[:, i]
58
+ df[f"{PROBABILITY_COLUMN_PREFIX}{i}"] = prob
59
+ return df
@@ -0,0 +1,22 @@
1
+ """A function for creating a new trainer."""
2
+
3
+ import datetime
4
+
5
+ from .trainer import Trainer
6
+
7
+
8
+ def create(
9
+ folder: str,
10
+ walkforward_timedelta: datetime.timedelta = datetime.timedelta(days=1.0),
11
+ test_size: float | datetime.timedelta | None = None,
12
+ validation_size: float | datetime.timedelta | None = None,
13
+ dt_column: str | None = None,
14
+ ) -> Trainer:
15
+ """Create a trainer."""
16
+ return Trainer(
17
+ folder,
18
+ walkforward_timedelta,
19
+ test_size=test_size,
20
+ validation_size=validation_size,
21
+ dt_column=dt_column,
22
+ )
@@ -0,0 +1,5 @@
1
+ """Custom exceptions within wavetrain."""
2
+
3
+
4
+ class WavetrainException(Exception):
5
+ """An exception that denotes that wavetrain should end the training early."""
@@ -0,0 +1,30 @@
1
+ """A prototype class implementing a fit method."""
2
+
3
+ from typing import Self
4
+
5
+ import pandas as pd
6
+
7
+
8
+ class Fit:
9
+ """The prototype fit class."""
10
+
11
+ def fit(
12
+ self,
13
+ df: pd.DataFrame,
14
+ y: pd.Series | pd.DataFrame | None = None,
15
+ w: pd.Series | None = None,
16
+ ) -> Self:
17
+ """Fit the dataframe."""
18
+ raise NotImplementedError("fit not implemented in parent class.")
19
+
20
+ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
21
+ """Transform the dataframe."""
22
+ raise NotImplementedError("transform not implemented in parent class.")
23
+
24
+ def fit_transform(
25
+ self,
26
+ df: pd.DataFrame,
27
+ y: pd.Series | pd.DataFrame | None = None,
28
+ ) -> pd.DataFrame:
29
+ """Fit and then trasnfrom the dataframe."""
30
+ return self.fit(df, y=y).transform(df)
@@ -0,0 +1,8 @@
1
+ """The function for loading the trainer state from disk."""
2
+
3
+ from .trainer import Trainer
4
+
5
+
6
+ def load(folder: str) -> Trainer:
7
+ """Loads the trainer from the folder."""
8
+ raise NotImplementedError("load isn't implemented.")
@@ -0,0 +1 @@
1
+ """The wavetrain model module."""