wavetrainer 0.0.4__tar.gz → 0.0.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. {wavetrainer-0.0.4/wavetrainer.egg-info → wavetrainer-0.0.6}/PKG-INFO +1 -3
  2. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/README.md +0 -1
  3. wavetrainer-0.0.4/wavetrainer.egg-info/requires.txt → wavetrainer-0.0.6/requirements.txt +1 -2
  4. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/setup.py +1 -1
  5. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/tests/trainer_test.py +1 -1
  6. wavetrainer-0.0.6/wavetrainer/__init__.py +6 -0
  7. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/calibrator/calibrator_router.py +5 -0
  8. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/calibrator/mapie_calibrator.py +24 -8
  9. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/calibrator/vennabers_calibrator.py +4 -0
  10. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/fit.py +8 -1
  11. wavetrainer-0.0.6/wavetrainer/model/catboost_classifier_wrap.py +15 -0
  12. wavetrainer-0.0.6/wavetrainer/model/catboost_kwargs.py +35 -0
  13. wavetrainer-0.0.6/wavetrainer/model/catboost_model.py +209 -0
  14. wavetrainer-0.0.6/wavetrainer/model/catboost_regressor_wrap.py +13 -0
  15. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/model/model.py +12 -0
  16. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/model/model_router.py +18 -1
  17. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/model_type.py +6 -6
  18. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/reducer/base_selector_reducer.py +4 -0
  19. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/reducer/combined_reducer.py +4 -0
  20. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/reducer/nonnumeric_reducer.py +4 -0
  21. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/selector/selector.py +26 -14
  22. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/trainer.py +18 -8
  23. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/weights/class_weights.py +4 -0
  24. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/weights/combined_weights.py +4 -0
  25. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/weights/exponential_weights.py +4 -0
  26. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/weights/linear_weights.py +3 -1
  27. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/weights/noop_weights.py +3 -1
  28. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/weights/sigmoid_weights.py +3 -1
  29. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/weights/weights_router.py +4 -0
  30. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/windower/windower.py +4 -0
  31. {wavetrainer-0.0.4 → wavetrainer-0.0.6/wavetrainer.egg-info}/PKG-INFO +1 -3
  32. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer.egg-info/SOURCES.txt +3 -1
  33. wavetrainer-0.0.4/requirements.txt → wavetrainer-0.0.6/wavetrainer.egg-info/requires.txt +0 -1
  34. wavetrainer-0.0.4/wavetrainer/__init__.py +0 -10
  35. wavetrainer-0.0.4/wavetrainer/load.py +0 -8
  36. wavetrainer-0.0.4/wavetrainer/model/catboost_model.py +0 -80
  37. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/LICENSE +0 -0
  38. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/MANIFEST.in +0 -0
  39. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/setup.cfg +0 -0
  40. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/tests/__init__.py +0 -0
  41. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/calibrator/__init__.py +0 -0
  42. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/calibrator/calibrator.py +0 -0
  43. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/create.py +0 -0
  44. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/exceptions.py +0 -0
  45. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/model/__init__.py +0 -0
  46. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/params.py +0 -0
  47. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/reducer/__init__.py +0 -0
  48. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/reducer/constant_reducer.py +0 -0
  49. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/reducer/correlation_reducer.py +0 -0
  50. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/reducer/duplicate_reducer.py +0 -0
  51. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/reducer/reducer.py +0 -0
  52. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/selector/__init__.py +0 -0
  53. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/weights/__init__.py +0 -0
  54. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/weights/weights.py +0 -0
  55. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/windower/__init__.py +0 -0
  56. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer.egg-info/dependency_links.txt +0 -0
  57. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer.egg-info/not-zip-safe +0 -0
  58. {wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.0.4
3
+ Version: 0.0.6
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
@@ -21,7 +21,6 @@ Requires-Dist: scipy>=1.15.2
21
21
  Requires-Dist: catboost>=1.2.7
22
22
  Requires-Dist: venn-abers>=1.4.6
23
23
  Requires-Dist: mapie>=0.9.2
24
- Requires-Dist: shapiq>=1.2.2
25
24
 
26
25
  # wavetrainer
27
26
 
@@ -49,7 +48,6 @@ Python 3.11.6:
49
48
  - [catboost](https://catboost.ai/)
50
49
  - [venn-abers](https://github.com/ip200/venn-abers)
51
50
  - [mapie](https://mapie.readthedocs.io/en/stable/)
52
- - [shapiq](https://github.com/mmschlk/shapiq)
53
51
 
54
52
  ## Raison D'être :thought_balloon:
55
53
 
@@ -24,7 +24,6 @@ Python 3.11.6:
24
24
  - [catboost](https://catboost.ai/)
25
25
  - [venn-abers](https://github.com/ip200/venn-abers)
26
26
  - [mapie](https://mapie.readthedocs.io/en/stable/)
27
- - [shapiq](https://github.com/mmschlk/shapiq)
28
27
 
29
28
  ## Raison D'être :thought_balloon:
30
29
 
@@ -7,5 +7,4 @@ numpy>=1.26.4
7
7
  scipy>=1.15.2
8
8
  catboost>=1.2.7
9
9
  venn-abers>=1.4.6
10
- mapie>=0.9.2
11
- shapiq>=1.2.2
10
+ mapie>=0.9.2
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
23
23
 
24
24
  setup(
25
25
  name='wavetrainer',
26
- version='0.0.4',
26
+ version='0.0.6',
27
27
  description='A library for automatically finding the optimal model within feature and hyperparameter space.',
28
28
  long_description=long_description,
29
29
  long_description_content_type='text/markdown',
@@ -13,7 +13,7 @@ class TestTrainer(unittest.TestCase):
13
13
 
14
14
  def test_trainer(self):
15
15
  with tempfile.TemporaryDirectory() as tmpdir:
16
- trainer = Trainer(tmpdir, walkforward_timedelta=datetime.timedelta(days=1), trials=10)
16
+ trainer = Trainer(tmpdir, walkforward_timedelta=datetime.timedelta(days=7), trials=1)
17
17
  x_data = [i for i in range(100)]
18
18
  x_index = [datetime.datetime(2022, 1, 1) + datetime.timedelta(days=i) for i in range(len(x_data))]
19
19
  df = pd.DataFrame(
@@ -0,0 +1,6 @@
1
+ """The wavetrain main module."""
2
+
3
+ from .create import create
4
+
5
+ __VERSION__ = "0.0.6"
6
+ __all__ = ("create",)
@@ -24,6 +24,8 @@ _CALIBRATORS = {
24
24
  class CalibratorRouter(Calibrator):
25
25
  """A router that routes to a different calibrator class."""
26
26
 
27
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
28
+
27
29
  _calibrator: Calibrator | None
28
30
 
29
31
  def __init__(self, model: Model):
@@ -66,7 +68,10 @@ class CalibratorRouter(Calibrator):
66
68
  df: pd.DataFrame,
67
69
  y: pd.Series | pd.DataFrame | None = None,
68
70
  w: pd.Series | None = None,
71
+ eval_x: pd.DataFrame | None = None,
72
+ eval_y: pd.Series | pd.DataFrame | None = None,
69
73
  ) -> Self:
74
+ # pylint: disable=no-else-return
70
75
  calibrator: Calibrator | None = None
71
76
  if determine_model_type(df) == ModelType.REGRESSION:
72
77
  calibrator = MAPIECalibrator(self._model)
@@ -1,11 +1,13 @@
1
1
  """A calibrator that implements MAPIE."""
2
2
 
3
+ import logging
3
4
  import os
4
5
  from typing import Self
5
6
 
6
7
  import joblib # type: ignore
7
8
  import optuna
8
9
  import pandas as pd
10
+ import sklearn # type: ignore
9
11
  from mapie.regression import MapieRegressor # type: ignore
10
12
 
11
13
  from ..model.model import PROBABILITY_COLUMN_PREFIX, Model
@@ -17,6 +19,8 @@ _CALIBRATOR_FILENAME = "mapie.joblib"
17
19
  class MAPIECalibrator(Calibrator):
18
20
  """A class that uses MAPIE as a calibrator."""
19
21
 
22
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
23
+
20
24
  def __init__(self, model: Model):
21
25
  super().__init__(model)
22
26
  self._mapie = MapieRegressor(model.estimator, method="plus")
@@ -39,22 +43,34 @@ class MAPIECalibrator(Calibrator):
39
43
  df: pd.DataFrame,
40
44
  y: pd.Series | pd.DataFrame | None = None,
41
45
  w: pd.Series | None = None,
46
+ eval_x: pd.DataFrame | None = None,
47
+ eval_y: pd.Series | pd.DataFrame | None = None,
42
48
  ) -> Self:
43
49
  mapie = self._mapie
44
50
  if mapie is None:
45
51
  raise ValueError("mapie is null")
46
52
  if y is None:
47
53
  raise ValueError("y is null")
54
+ if len(df) <= 5:
55
+ return self
48
56
  mapie.fit(df.to_numpy(), y.to_numpy())
49
57
  return self
50
58
 
51
59
  def transform(self, df: pd.DataFrame) -> pd.DataFrame:
52
- alpha = [0.05, 0.32]
53
- _, y_pis = self._mapie.predict(df, alpha=alpha)
54
- df = pd.DataFrame(data=None, index=df.index)
55
- for i in range(y_pis.shape[1]):
56
- for ii in range(y_pis.shape[2]):
57
- df[f"{PROBABILITY_COLUMN_PREFIX}{alpha[i]}_{ii == 1}"] = (
58
- y_pis[:, i, ii].flatten().tolist()
59
- )
60
+ try:
61
+ alpha = []
62
+ for potential_alpha in [0.05, 0.32]:
63
+ if len(df) > int(1.0 / potential_alpha):
64
+ alpha.append(potential_alpha)
65
+ if alpha:
66
+ _, y_pis = self._mapie.predict(df, alpha=alpha)
67
+ for i in range(y_pis.shape[1]):
68
+ if i >= len(alpha):
69
+ continue
70
+ for ii in range(y_pis.shape[2]):
71
+ alpha_val = alpha[i]
72
+ values = y_pis[:, i, ii].flatten().tolist()
73
+ df[f"{PROBABILITY_COLUMN_PREFIX}{alpha_val}_{ii == 1}"] = values
74
+ except sklearn.exceptions.NotFittedError as exc: # type: ignore
75
+ logging.warning(str(exc))
60
76
  return df
@@ -17,6 +17,8 @@ _CALIBRATOR_FILENAME = "vennabers.joblib"
17
17
  class VennabersCalibrator(Calibrator):
18
18
  """A class that uses venn abers as a calibrator."""
19
19
 
20
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
21
+
20
22
  def __init__(self, model: Model):
21
23
  super().__init__(model)
22
24
  self._vennabers = VennAbers()
@@ -39,6 +41,8 @@ class VennabersCalibrator(Calibrator):
39
41
  df: pd.DataFrame,
40
42
  y: pd.Series | pd.DataFrame | None = None,
41
43
  w: pd.Series | None = None,
44
+ eval_x: pd.DataFrame | None = None,
45
+ eval_y: pd.Series | pd.DataFrame | None = None,
42
46
  ) -> Self:
43
47
  vennabers = self._vennabers
44
48
  if vennabers is None:
@@ -8,11 +8,15 @@ import pandas as pd
8
8
  class Fit:
9
9
  """The prototype fit class."""
10
10
 
11
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
12
+
11
13
  def fit(
12
14
  self,
13
15
  df: pd.DataFrame,
14
16
  y: pd.Series | pd.DataFrame | None = None,
15
17
  w: pd.Series | None = None,
18
+ eval_x: pd.DataFrame | None = None,
19
+ eval_y: pd.Series | pd.DataFrame | None = None,
16
20
  ) -> Self:
17
21
  """Fit the dataframe."""
18
22
  raise NotImplementedError("fit not implemented in parent class.")
@@ -25,6 +29,9 @@ class Fit:
25
29
  self,
26
30
  df: pd.DataFrame,
27
31
  y: pd.Series | pd.DataFrame | None = None,
32
+ w: pd.Series | None = None,
33
+ eval_x: pd.DataFrame | None = None,
34
+ eval_y: pd.Series | pd.DataFrame | None = None,
28
35
  ) -> pd.DataFrame:
29
36
  """Fit and then trasnfrom the dataframe."""
30
- return self.fit(df, y=y).transform(df)
37
+ return self.fit(df, y=y, w=w, eval_x=eval_x, eval_y=eval_y).transform(df)
@@ -0,0 +1,15 @@
1
+ """A wrapper for catboost classifier to handle some edge cases."""
2
+
3
+ # pylint: disable=duplicate-code
4
+
5
+ from catboost import CatBoostClassifier # type: ignore
6
+
7
+ from .catboost_kwargs import handle_fit_kwargs
8
+
9
+
10
+ class CatBoostClassifierWrapper(CatBoostClassifier):
11
+ """A wrapper for the catboost classifier."""
12
+
13
+ def fit(self, *args, **kwargs):
14
+ kwargs = handle_fit_kwargs(*args, **kwargs)
15
+ return super().fit(*args, **kwargs)
@@ -0,0 +1,35 @@
1
+ """A list of constant catboost kwargs."""
2
+
3
+ from typing import Any
4
+
5
+ import numpy as np
6
+ from catboost import Pool # type: ignore
7
+
8
+ ORIGINAL_X = "original_x"
9
+ EVAL_SET = "eval_set"
10
+
11
+
12
+ def handle_fit_kwargs(*args, **kwargs) -> dict[str, Any]:
13
+ """Handles keyword args coming into a catboost fit method."""
14
+ if ORIGINAL_X in kwargs:
15
+ df = kwargs[ORIGINAL_X]
16
+ eval_x, eval_y = kwargs[EVAL_SET]
17
+ fit_x = args[0]
18
+ fix_x_cp = fit_x.copy()
19
+
20
+ # Stupid code to ensure eval is feature equivalent to train data
21
+ included_columns = []
22
+ for i in range(fix_x_cp.shape[1]):
23
+ arr_col_values = fix_x_cp[:, i]
24
+ for col in df.columns:
25
+ df_col_values = df[col].values
26
+ if np.allclose(df_col_values, arr_col_values, equal_nan=True):
27
+ included_columns.append(col)
28
+ df = df.drop(col, axis=1)
29
+ break
30
+
31
+ eval_x = eval_x[included_columns]
32
+ kwargs[EVAL_SET] = Pool(eval_x, label=eval_y)
33
+
34
+ del kwargs[ORIGINAL_X]
35
+ return kwargs
@@ -0,0 +1,209 @@
1
+ """A model that wraps catboost."""
2
+
3
+ import json
4
+ import os
5
+ from typing import Any, Self
6
+
7
+ import optuna
8
+ import pandas as pd
9
+ from catboost import CatBoost, Pool # type: ignore
10
+
11
+ from ..model_type import ModelType, determine_model_type
12
+ from .catboost_classifier_wrap import CatBoostClassifierWrapper
13
+ from .catboost_kwargs import EVAL_SET, ORIGINAL_X
14
+ from .catboost_regressor_wrap import CatBoostRegressorWrapper
15
+ from .model import PREDICTION_COLUMN, PROBABILITY_COLUMN_PREFIX, Model
16
+
17
+ _MODEL_FILENAME = "model.cbm"
18
+ _MODEL_PARAMS_FILENAME = "model_params.json"
19
+ _ITERATIONS_KEY = "iterations"
20
+ _LEARNING_RATE_KEY = "learning_rate"
21
+ _DEPTH_KEY = "depth"
22
+ _L2_LEAF_REG_KEY = "l2_leaf_reg"
23
+ _BOOSTING_TYPE_KEY = "boosting_type"
24
+ _MODEL_TYPE_KEY = "model_type"
25
+
26
+
27
+ class CatboostModel(Model):
28
+ """A class that uses Catboost as a model."""
29
+
30
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
31
+
32
+ _catboost: CatBoost | None
33
+ _iterations: None | int
34
+ _learning_rate: None | float
35
+ _depth: None | int
36
+ _l2_leaf_reg: None | float
37
+ _boosting_type: None | str
38
+ _model_type: None | ModelType
39
+
40
+ @classmethod
41
+ def name(cls) -> str:
42
+ return "catboost"
43
+
44
+ def __init__(self) -> None:
45
+ super().__init__()
46
+ self._catboost = None
47
+ self._iterations = None
48
+ self._learning_rate = None
49
+ self._depth = None
50
+ self._l2_leaf_reg = None
51
+ self._boosting_type = None
52
+ self._model_type = None
53
+
54
+ @property
55
+ def estimator(self) -> Any:
56
+ return self._provide_catboost()
57
+
58
+ def pre_fit(
59
+ self,
60
+ df: pd.DataFrame,
61
+ y: pd.Series | pd.DataFrame | None,
62
+ eval_x: pd.DataFrame | None = None,
63
+ eval_y: pd.Series | pd.DataFrame | None = None,
64
+ ):
65
+ if y is None:
66
+ raise ValueError("y is null.")
67
+ self._model_type = determine_model_type(y)
68
+ return {
69
+ EVAL_SET: (eval_x, eval_y),
70
+ "cat_features": df.select_dtypes(include="category").columns.tolist(),
71
+ ORIGINAL_X: df,
72
+ }
73
+
74
+ def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
75
+ self._iterations = trial.suggest_int(_ITERATIONS_KEY, 100, 10000)
76
+ self._learning_rate = trial.suggest_float(_LEARNING_RATE_KEY, 0.001, 0.3)
77
+ self._depth = trial.suggest_int(_DEPTH_KEY, 1, 12)
78
+ self._l2_leaf_reg = trial.suggest_float(_L2_LEAF_REG_KEY, 3.0, 50.0)
79
+ self._boosting_type = trial.suggest_categorical(
80
+ _BOOSTING_TYPE_KEY, ["Ordered", "Plain"]
81
+ )
82
+
83
+ def load(self, folder: str) -> None:
84
+ with open(
85
+ os.path.join(folder, _MODEL_PARAMS_FILENAME), encoding="utf8"
86
+ ) as handle:
87
+ params = json.load(handle)
88
+ self._iterations = params[_ITERATIONS_KEY]
89
+ self._learning_rate = params[_LEARNING_RATE_KEY]
90
+ self._depth = params[_DEPTH_KEY]
91
+ self._l2_leaf_reg = params[_L2_LEAF_REG_KEY]
92
+ self._boosting_type = params[_BOOSTING_TYPE_KEY]
93
+ self._model_type = ModelType(params[_MODEL_TYPE_KEY])
94
+ catboost = self._provide_catboost()
95
+ catboost.load_model(os.path.join(folder, _MODEL_FILENAME))
96
+
97
+ def save(self, folder: str) -> None:
98
+ with open(
99
+ os.path.join(folder, _MODEL_PARAMS_FILENAME), "w", encoding="utf8"
100
+ ) as handle:
101
+ json.dump(
102
+ {
103
+ _ITERATIONS_KEY: self._iterations,
104
+ _LEARNING_RATE_KEY: self._learning_rate,
105
+ _DEPTH_KEY: self._depth,
106
+ _L2_LEAF_REG_KEY: self._l2_leaf_reg,
107
+ _BOOSTING_TYPE_KEY: self._boosting_type,
108
+ _MODEL_TYPE_KEY: str(self._model_type),
109
+ },
110
+ handle,
111
+ )
112
+ catboost = self._provide_catboost()
113
+ catboost.save_model(os.path.join(folder, _MODEL_FILENAME))
114
+
115
+ def fit(
116
+ self,
117
+ df: pd.DataFrame,
118
+ y: pd.Series | pd.DataFrame | None = None,
119
+ w: pd.Series | None = None,
120
+ eval_x: pd.DataFrame | None = None,
121
+ eval_y: pd.Series | pd.DataFrame | None = None,
122
+ ) -> Self:
123
+ if y is None:
124
+ raise ValueError("y is null.")
125
+ self._model_type = determine_model_type(y)
126
+ catboost = self._provide_catboost()
127
+
128
+ train_pool = Pool(
129
+ df,
130
+ label=y,
131
+ weight=w,
132
+ )
133
+ eval_pool = Pool(
134
+ eval_x,
135
+ label=eval_y,
136
+ )
137
+ catboost.fit(
138
+ train_pool,
139
+ early_stopping_rounds=100,
140
+ verbose=False,
141
+ metric_period=100,
142
+ eval_set=eval_pool,
143
+ )
144
+ return self
145
+
146
+ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
147
+ pred_pool = Pool(df)
148
+ catboost = self._provide_catboost()
149
+ pred = catboost.predict(pred_pool)
150
+ df = pd.DataFrame(
151
+ index=df.index,
152
+ data={
153
+ PREDICTION_COLUMN: pred.flatten(),
154
+ },
155
+ )
156
+ if self._model_type != ModelType.REGRESSION:
157
+ proba = catboost.predict_proba(pred_pool) # type: ignore
158
+ for i in range(proba.shape[1]):
159
+ df[f"{PROBABILITY_COLUMN_PREFIX}{i}"] = proba[:, i]
160
+ return df
161
+
162
+ def _provide_catboost(self) -> CatBoost:
163
+ catboost = self._catboost
164
+ if catboost is None:
165
+ match self._model_type:
166
+ case ModelType.BINARY:
167
+ catboost = CatBoostClassifierWrapper(
168
+ iterations=self._iterations,
169
+ learning_rate=self._learning_rate,
170
+ depth=self._depth,
171
+ l2_leaf_reg=self._l2_leaf_reg,
172
+ boosting_type=self._boosting_type,
173
+ early_stopping_rounds=100,
174
+ metric_period=100,
175
+ )
176
+ case ModelType.REGRESSION:
177
+ catboost = CatBoostRegressorWrapper(
178
+ iterations=self._iterations,
179
+ learning_rate=self._learning_rate,
180
+ depth=self._depth,
181
+ l2_leaf_reg=self._l2_leaf_reg,
182
+ boosting_type=self._boosting_type,
183
+ early_stopping_rounds=100,
184
+ metric_period=100,
185
+ )
186
+ case ModelType.BINNED_BINARY:
187
+ catboost = CatBoostClassifierWrapper(
188
+ iterations=self._iterations,
189
+ learning_rate=self._learning_rate,
190
+ depth=self._depth,
191
+ l2_leaf_reg=self._l2_leaf_reg,
192
+ boosting_type=self._boosting_type,
193
+ early_stopping_rounds=100,
194
+ metric_period=100,
195
+ )
196
+ case ModelType.MULTI_CLASSIFICATION:
197
+ catboost = CatBoostClassifierWrapper(
198
+ iterations=self._iterations,
199
+ learning_rate=self._learning_rate,
200
+ depth=self._depth,
201
+ l2_leaf_reg=self._l2_leaf_reg,
202
+ boosting_type=self._boosting_type,
203
+ early_stopping_rounds=100,
204
+ metric_period=100,
205
+ )
206
+ self._catboost = catboost
207
+ if catboost is None:
208
+ raise ValueError("catboost is null")
209
+ return catboost
@@ -0,0 +1,13 @@
1
+ """A wrapper for catboost regressor to handle some edge cases."""
2
+
3
+ from catboost import CatBoostRegressor # type: ignore
4
+
5
+ from .catboost_kwargs import handle_fit_kwargs
6
+
7
+
8
+ class CatBoostRegressorWrapper(CatBoostRegressor):
9
+ """A wrapper for the catboost regressor."""
10
+
11
+ def fit(self, *args, **kwargs):
12
+ kwargs = handle_fit_kwargs(*args, **kwargs)
13
+ return super().fit(*args, **kwargs)
@@ -2,6 +2,8 @@
2
2
 
3
3
  from typing import Any
4
4
 
5
+ import pandas as pd
6
+
5
7
  from ..fit import Fit
6
8
  from ..params import Params
7
9
 
@@ -21,3 +23,13 @@ class Model(Params, Fit):
21
23
  def estimator(self) -> Any:
22
24
  """The estimator backing the model."""
23
25
  raise NotImplementedError("estimator not implemented in parent class.")
26
+
27
+ def pre_fit(
28
+ self,
29
+ df: pd.DataFrame,
30
+ y: pd.Series | pd.DataFrame | None,
31
+ eval_x: pd.DataFrame | None = None,
32
+ eval_y: pd.Series | pd.DataFrame | None = None,
33
+ ) -> dict[str, Any]:
34
+ """A call to make sure the model is prepared for the target type."""
35
+ raise NotImplementedError("pre_fit not implemented in parent class.")
@@ -20,6 +20,8 @@ _MODELS = {
20
20
  class ModelRouter(Model):
21
21
  """A router that routes to a different weights class."""
22
22
 
23
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
24
+
23
25
  _model: Model | None
24
26
 
25
27
  def __init__(self) -> None:
@@ -37,10 +39,23 @@ class ModelRouter(Model):
37
39
  raise ValueError("model is null")
38
40
  return model.estimator
39
41
 
42
+ def pre_fit(
43
+ self,
44
+ df: pd.DataFrame,
45
+ y: pd.Series | pd.DataFrame | None,
46
+ eval_x: pd.DataFrame | None = None,
47
+ eval_y: pd.Series | pd.DataFrame | None = None,
48
+ ) -> dict[str, Any]:
49
+ model = self._model
50
+ if model is None:
51
+ raise ValueError("model is null")
52
+ return model.pre_fit(df, y=y, eval_x=eval_x, eval_y=eval_y)
53
+
40
54
  def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
41
55
  self._model = _MODELS[
42
56
  trial.suggest_categorical("model", list(_MODELS.keys()))
43
57
  ]()
58
+ self._model.set_options(trial)
44
59
 
45
60
  def load(self, folder: str) -> None:
46
61
  with open(os.path.join(folder, _MODEL_ROUTER_FILE), encoding="utf8") as handle:
@@ -69,11 +84,13 @@ class ModelRouter(Model):
69
84
  df: pd.DataFrame,
70
85
  y: pd.Series | pd.DataFrame | None = None,
71
86
  w: pd.Series | None = None,
87
+ eval_x: pd.DataFrame | None = None,
88
+ eval_y: pd.Series | pd.DataFrame | None = None,
72
89
  ) -> Self:
73
90
  model = self._model
74
91
  if model is None:
75
92
  raise ValueError("model is null")
76
- model.fit(df, y=y, w=w)
93
+ model.fit(df, y=y, w=w, eval_x=eval_x, eval_y=eval_y)
77
94
  return self
78
95
 
79
96
  def transform(self, df: pd.DataFrame) -> pd.DataFrame:
@@ -1,17 +1,17 @@
1
1
  """An enum to define the model type."""
2
2
 
3
- from enum import Enum
3
+ from enum import StrEnum, auto
4
4
 
5
5
  import pandas as pd
6
6
 
7
7
 
8
- class ModelType(Enum):
8
+ class ModelType(StrEnum):
9
9
  """The type of model being run."""
10
10
 
11
- BINARY = 1
12
- REGRESSION = 2
13
- BINNED_BINARY = 3
14
- MULTI_CLASSIFICATION = 4
11
+ BINARY = auto()
12
+ REGRESSION = auto()
13
+ BINNED_BINARY = auto()
14
+ MULTI_CLASSIFICATION = auto()
15
15
 
16
16
 
17
17
  def determine_model_type(y: pd.Series | pd.DataFrame) -> ModelType:
@@ -15,6 +15,8 @@ from .reducer import Reducer
15
15
  class BaseSelectorReducer(Reducer):
16
16
  """A class that uses the base selector from the feature engine."""
17
17
 
18
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
19
+
18
20
  def __init__(self, base_selector: BaseSelector, file_name: str) -> None:
19
21
  super().__init__()
20
22
  self._base_selector = base_selector
@@ -40,6 +42,8 @@ class BaseSelectorReducer(Reducer):
40
42
  df: pd.DataFrame,
41
43
  y: pd.Series | pd.DataFrame | None = None,
42
44
  w: pd.Series | None = None,
45
+ eval_x: pd.DataFrame | None = None,
46
+ eval_y: pd.Series | pd.DataFrame | None = None,
43
47
  ) -> Self:
44
48
  try:
45
49
  self._base_selector.fit(df) # type: ignore
@@ -20,6 +20,8 @@ _REDUCERS_KEY = "reducers"
20
20
  class CombinedReducer(Reducer):
21
21
  """A reducer that combines a series of reducers."""
22
22
 
23
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
24
+
23
25
  def __init__(self):
24
26
  super().__init__()
25
27
  self._reducers = [
@@ -73,6 +75,8 @@ class CombinedReducer(Reducer):
73
75
  df: pd.DataFrame,
74
76
  y: pd.Series | pd.DataFrame | None = None,
75
77
  w: pd.Series | None = None,
78
+ eval_x: pd.DataFrame | None = None,
79
+ eval_y: pd.Series | pd.DataFrame | None = None,
76
80
  ) -> Self:
77
81
  for reducer in self._reducers:
78
82
  df = reducer.fit_transform(df)
@@ -11,6 +11,8 @@ from .reducer import Reducer
11
11
  class NonNumericReducer(Reducer):
12
12
  """A class that removes non numeric columns from a dataframe."""
13
13
 
14
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
15
+
14
16
  @classmethod
15
17
  def name(cls) -> str:
16
18
  return "nonnumeric"
@@ -29,6 +31,8 @@ class NonNumericReducer(Reducer):
29
31
  df: pd.DataFrame,
30
32
  y: pd.Series | pd.DataFrame | None = None,
31
33
  w: pd.Series | None = None,
34
+ eval_x: pd.DataFrame | None = None,
35
+ eval_y: pd.Series | pd.DataFrame | None = None,
32
36
  ) -> Self:
33
37
  return self
34
38
 
@@ -7,6 +7,7 @@ from typing import Self
7
7
  import joblib # type: ignore
8
8
  import optuna
9
9
  import pandas as pd
10
+ import sklearn # type: ignore
10
11
  from sklearn.feature_selection import RFE # type: ignore
11
12
 
12
13
  from ..fit import Fit
@@ -19,24 +20,20 @@ _SELECTOR_FILE = "selector.joblib"
19
20
  class Selector(Params, Fit):
20
21
  """The selector class."""
21
22
 
22
- def __init__(self, model: Model, total_features: int):
23
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
24
+
25
+ _selector: RFE | None
26
+
27
+ def __init__(self, model: Model):
23
28
  super().__init__()
24
29
  self._model = model
25
30
  self._feature_ratio = 0.0
26
31
  self._steps = 0
27
- n_features_to_select = max(1, total_features * self._feature_ratio)
28
- self._selector = RFE(
29
- model.estimator,
30
- n_features_to_select=n_features_to_select,
31
- step=self._steps,
32
- verbose=1,
33
- )
32
+ self._selector = None
34
33
 
35
34
  def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
36
35
  self._feature_ratio = trial.suggest_float("feature_ratio", 0.0, 1.0)
37
- steps = trial.suggest_int("steps", 1, 16)
38
- self._steps = steps
39
- self._selector.step = steps
36
+ self._steps = trial.suggest_int("steps", 1, 16)
40
37
 
41
38
  def load(self, folder: str) -> None:
42
39
  self._selector = joblib.load(os.path.join(folder, _SELECTOR_FILE))
@@ -49,20 +46,35 @@ class Selector(Params, Fit):
49
46
  df: pd.DataFrame,
50
47
  y: pd.Series | pd.DataFrame | None = None,
51
48
  w: pd.Series | None = None,
49
+ eval_x: pd.DataFrame | None = None,
50
+ eval_y: pd.Series | pd.DataFrame | None = None,
52
51
  ) -> Self:
52
+ sklearn.set_config(enable_metadata_routing=False)
53
+ model_kwargs = self._model.pre_fit(df, y=y, eval_x=eval_x, eval_y=eval_y)
53
54
  if not isinstance(y, pd.Series):
54
55
  raise ValueError("y is not a series.")
55
-
56
+ n_features_to_select = max(1, int(len(df.columns) * self._feature_ratio))
57
+ self._selector = RFE(
58
+ self._model.estimator,
59
+ n_features_to_select=n_features_to_select,
60
+ step=max(
61
+ 1,
62
+ int((len(df.columns) - n_features_to_select) / self._steps),
63
+ ),
64
+ )
56
65
  try:
57
- self._selector.fit(df, y=y, sample_weight=w)
66
+ self._selector.fit(df, y=y, sample_weight=w, **model_kwargs)
58
67
  except ValueError as exc:
59
68
  # Catch issues with 1 feature as a reduction target.
60
69
  logging.warning(str(exc))
61
70
  return self
62
71
 
63
72
  def transform(self, df: pd.DataFrame) -> pd.DataFrame:
73
+ selector = self._selector
74
+ if selector is None:
75
+ raise ValueError("selector is null.")
64
76
  try:
65
- return df[self._selector.get_feature_names_out()]
77
+ return df[selector.get_feature_names_out()]
66
78
  except AttributeError as exc:
67
79
  # Catch issues with 1 feature as a reduction target.
68
80
  logging.warning(str(exc))
@@ -11,7 +11,7 @@ from typing import Self
11
11
  import optuna
12
12
  import pandas as pd
13
13
  import tqdm
14
- from sklearn.metrics import accuracy_score, f1_score # type: ignore
14
+ from sklearn.metrics import f1_score, r2_score # type: ignore
15
15
 
16
16
  from .calibrator.calibrator_router import CalibratorRouter
17
17
  from .exceptions import WavetrainException
@@ -158,6 +158,8 @@ class Trainer(Fit):
158
158
  df: pd.DataFrame,
159
159
  y: pd.Series | pd.DataFrame | None = None,
160
160
  w: pd.Series | None = None,
161
+ eval_x: pd.DataFrame | None = None,
162
+ eval_y: pd.Series | pd.DataFrame | None = None,
161
163
  ) -> Self:
162
164
  """Perform a train on the data to fit to the targets."""
163
165
  if y is None:
@@ -215,12 +217,14 @@ class Trainer(Fit):
215
217
  model.set_options(trial)
216
218
 
217
219
  # Train
218
- selector = Selector(model, len(x_train.columns.values))
220
+ selector = Selector(model)
219
221
  selector.set_options(trial)
220
- selector.fit(x_train, y=y_train, w=w)
222
+ selector.fit(x_train, y=y_train, w=w, eval_x=x_test, eval_y=y_test)
221
223
  x_train = selector.transform(x_train)
222
224
  x_test = selector.transform(x_test)
223
- x_pred = model.fit_transform(x_train, y=y_train)
225
+ x_pred = model.fit_transform(
226
+ x_train, y=y_train, w=w, eval_x=x_test, eval_y=y_test
227
+ )
224
228
 
225
229
  # Calibrate
226
230
  calibrator = CalibratorRouter(model)
@@ -243,8 +247,8 @@ class Trainer(Fit):
243
247
  y_pred = model.transform(x_test)
244
248
  y_pred = calibrator.transform(y_pred)
245
249
  if determine_model_type(y_series) == ModelType.REGRESSION:
246
- return accuracy_score(y_test, y_pred[[PREDICTION_COLUMN]])
247
- return f1_score(y_test, y_pred[[PREDICTION_COLUMN]])
250
+ return float(r2_score(y_test, y_pred[[PREDICTION_COLUMN]]))
251
+ return float(f1_score(y_test, y_pred[[PREDICTION_COLUMN]]))
248
252
  except WavetrainException as exc:
249
253
  logging.warning(str(exc))
250
254
  return -1.0
@@ -286,9 +290,15 @@ class Trainer(Fit):
286
290
  train_len = len(df[dt_index < start_test_index])
287
291
  test_len = len(df.loc[start_test_index:start_validation_index])
288
292
 
293
+ last_processed_dt = None
289
294
  for count, test_idx in tqdm.tqdm(
290
- enumerate(df[dt_index >= start_test_index].index)
295
+ enumerate(test_dt_index[test_dt_index >= start_test_index])
291
296
  ):
297
+ if (
298
+ last_processed_dt is not None
299
+ and test_idx < last_processed_dt + self._walkforward_timedelta
300
+ ):
301
+ continue
292
302
  test_dt = test_idx.to_pydatetime()
293
303
  found = False
294
304
  for trial in study.trials:
@@ -373,7 +383,7 @@ class Trainer(Fit):
373
383
  model = ModelRouter()
374
384
  model.load(folder)
375
385
 
376
- selector = Selector(model, len(df.columns.values))
386
+ selector = Selector(model)
377
387
  selector.load(folder)
378
388
 
379
389
  calibrator = CalibratorRouter(model)
@@ -14,6 +14,8 @@ from .weights import WEIGHTS_COLUMN, Weights
14
14
  class ClassWeights(Weights):
15
15
  """Class weight class."""
16
16
 
17
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
18
+
17
19
  _class_weights: dict[Any, float]
18
20
 
19
21
  def __init__(self) -> None:
@@ -39,6 +41,8 @@ class ClassWeights(Weights):
39
41
  df: pd.DataFrame,
40
42
  y: pd.Series | pd.DataFrame | None = None,
41
43
  w: pd.Series | None = None,
44
+ eval_x: pd.DataFrame | None = None,
45
+ eval_y: pd.Series | pd.DataFrame | None = None,
42
46
  ) -> Self:
43
47
  if not isinstance(y, pd.Series):
44
48
  raise ValueError("y is not a series.")
@@ -13,6 +13,8 @@ from .weights_router import WeightsRouter
13
13
  class CombinedWeights(Weights):
14
14
  """A weights class that combines multiple weights."""
15
15
 
16
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
17
+
16
18
  def __init__(self) -> None:
17
19
  super().__init__()
18
20
  self._weights = [WeightsRouter(), ClassWeights()]
@@ -38,6 +40,8 @@ class CombinedWeights(Weights):
38
40
  df: pd.DataFrame,
39
41
  y: pd.Series | pd.DataFrame | None = None,
40
42
  w: pd.Series | None = None,
43
+ eval_x: pd.DataFrame | None = None,
44
+ eval_y: pd.Series | pd.DataFrame | None = None,
41
45
  ) -> Self:
42
46
  for weights in self._weights:
43
47
  weights.fit(df, y=y)
@@ -12,6 +12,8 @@ from .weights import WEIGHTS_COLUMN, Weights
12
12
  class ExponentialWeights(Weights):
13
13
  """Exponential weight class."""
14
14
 
15
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
16
+
15
17
  @classmethod
16
18
  def name(cls) -> str:
17
19
  """The name of the weight class."""
@@ -31,6 +33,8 @@ class ExponentialWeights(Weights):
31
33
  df: pd.DataFrame,
32
34
  y: pd.Series | pd.DataFrame | None = None,
33
35
  w: pd.Series | None = None,
36
+ eval_x: pd.DataFrame | None = None,
37
+ eval_y: pd.Series | pd.DataFrame | None = None,
34
38
  ) -> Self:
35
39
  return self
36
40
 
@@ -12,7 +12,7 @@ from .weights import WEIGHTS_COLUMN, Weights
12
12
  class LinearWeights(Weights):
13
13
  """Linear weight class."""
14
14
 
15
- # pylint: disable=duplicate-code
15
+ # pylint: disable=duplicate-code,too-many-positional-arguments,too-many-arguments
16
16
 
17
17
  @classmethod
18
18
  def name(cls) -> str:
@@ -33,6 +33,8 @@ class LinearWeights(Weights):
33
33
  df: pd.DataFrame,
34
34
  y: pd.Series | pd.DataFrame | None = None,
35
35
  w: pd.Series | None = None,
36
+ eval_x: pd.DataFrame | None = None,
37
+ eval_y: pd.Series | pd.DataFrame | None = None,
36
38
  ) -> Self:
37
39
  return self
38
40
 
@@ -12,7 +12,7 @@ from .weights import WEIGHTS_COLUMN, Weights
12
12
  class NoopWeights(Weights):
13
13
  """Noop weight class."""
14
14
 
15
- # pylint: disable=duplicate-code
15
+ # pylint: disable=duplicate-code,too-many-positional-arguments,too-many-arguments
16
16
 
17
17
  @classmethod
18
18
  def name(cls) -> str:
@@ -33,6 +33,8 @@ class NoopWeights(Weights):
33
33
  df: pd.DataFrame,
34
34
  y: pd.Series | pd.DataFrame | None = None,
35
35
  w: pd.Series | None = None,
36
+ eval_x: pd.DataFrame | None = None,
37
+ eval_y: pd.Series | pd.DataFrame | None = None,
36
38
  ) -> Self:
37
39
  return self
38
40
 
@@ -13,7 +13,7 @@ from .weights import WEIGHTS_COLUMN, Weights
13
13
  class SigmoidWeights(Weights):
14
14
  """Sigmoid weight class."""
15
15
 
16
- # pylint: disable=duplicate-code
16
+ # pylint: disable=duplicate-code,too-many-positional-arguments,too-many-arguments
17
17
 
18
18
  @classmethod
19
19
  def name(cls) -> str:
@@ -34,6 +34,8 @@ class SigmoidWeights(Weights):
34
34
  df: pd.DataFrame,
35
35
  y: pd.Series | pd.DataFrame | None = None,
36
36
  w: pd.Series | None = None,
37
+ eval_x: pd.DataFrame | None = None,
38
+ eval_y: pd.Series | pd.DataFrame | None = None,
37
39
  ) -> Self:
38
40
  return self
39
41
 
@@ -26,6 +26,8 @@ _WEIGHTS = {
26
26
  class WeightsRouter(Weights):
27
27
  """A router that routes to a different weights class."""
28
28
 
29
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
30
+
29
31
  _weights: Weights | None
30
32
 
31
33
  def __init__(self) -> None:
@@ -71,6 +73,8 @@ class WeightsRouter(Weights):
71
73
  df: pd.DataFrame,
72
74
  y: pd.Series | pd.DataFrame | None = None,
73
75
  w: pd.Series | None = None,
76
+ eval_x: pd.DataFrame | None = None,
77
+ eval_y: pd.Series | pd.DataFrame | None = None,
74
78
  ) -> Self:
75
79
  return self
76
80
 
@@ -18,6 +18,8 @@ _LOOKBACK_KEY = "lookback"
18
18
  class Windower(Params, Fit):
19
19
  """The windower class."""
20
20
 
21
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
22
+
21
23
  _lookback_ratio: float | None
22
24
 
23
25
  def __init__(self, dt_column: str | None):
@@ -48,6 +50,8 @@ class Windower(Params, Fit):
48
50
  df: pd.DataFrame,
49
51
  y: pd.Series | pd.DataFrame | None = None,
50
52
  w: pd.Series | None = None,
53
+ eval_x: pd.DataFrame | None = None,
54
+ eval_y: pd.Series | pd.DataFrame | None = None,
51
55
  ) -> Self:
52
56
  lookback_ratio = self._lookback_ratio
53
57
  if lookback_ratio is None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.0.4
3
+ Version: 0.0.6
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
@@ -21,7 +21,6 @@ Requires-Dist: scipy>=1.15.2
21
21
  Requires-Dist: catboost>=1.2.7
22
22
  Requires-Dist: venn-abers>=1.4.6
23
23
  Requires-Dist: mapie>=0.9.2
24
- Requires-Dist: shapiq>=1.2.2
25
24
 
26
25
  # wavetrainer
27
26
 
@@ -49,7 +48,6 @@ Python 3.11.6:
49
48
  - [catboost](https://catboost.ai/)
50
49
  - [venn-abers](https://github.com/ip200/venn-abers)
51
50
  - [mapie](https://mapie.readthedocs.io/en/stable/)
52
- - [shapiq](https://github.com/mmschlk/shapiq)
53
51
 
54
52
  ## Raison D'être :thought_balloon:
55
53
 
@@ -9,7 +9,6 @@ wavetrainer/__init__.py
9
9
  wavetrainer/create.py
10
10
  wavetrainer/exceptions.py
11
11
  wavetrainer/fit.py
12
- wavetrainer/load.py
13
12
  wavetrainer/model_type.py
14
13
  wavetrainer/params.py
15
14
  wavetrainer/trainer.py
@@ -25,7 +24,10 @@ wavetrainer/calibrator/calibrator_router.py
25
24
  wavetrainer/calibrator/mapie_calibrator.py
26
25
  wavetrainer/calibrator/vennabers_calibrator.py
27
26
  wavetrainer/model/__init__.py
27
+ wavetrainer/model/catboost_classifier_wrap.py
28
+ wavetrainer/model/catboost_kwargs.py
28
29
  wavetrainer/model/catboost_model.py
30
+ wavetrainer/model/catboost_regressor_wrap.py
29
31
  wavetrainer/model/model.py
30
32
  wavetrainer/model/model_router.py
31
33
  wavetrainer/reducer/__init__.py
@@ -8,4 +8,3 @@ scipy>=1.15.2
8
8
  catboost>=1.2.7
9
9
  venn-abers>=1.4.6
10
10
  mapie>=0.9.2
11
- shapiq>=1.2.2
@@ -1,10 +0,0 @@
1
- """The wavetrain main module."""
2
-
3
- from .create import create
4
- from .load import load
5
-
6
- __VERSION__ = "0.0.4"
7
- __all__ = (
8
- "create",
9
- "load",
10
- )
@@ -1,8 +0,0 @@
1
- """The function for loading the trainer state from disk."""
2
-
3
- from .trainer import Trainer
4
-
5
-
6
- def load(folder: str) -> Trainer:
7
- """Loads the trainer from the folder."""
8
- raise NotImplementedError("load isn't implemented.")
@@ -1,80 +0,0 @@
1
- """A model that wraps catboost."""
2
-
3
- import os
4
- from typing import Any, Self
5
-
6
- import optuna
7
- import pandas as pd
8
- from catboost import CatBoostClassifier, Pool # type: ignore
9
-
10
- from .model import PREDICTION_COLUMN, PROBABILITY_COLUMN_PREFIX, Model
11
-
12
- _MODEL_FILENAME = "model.cbm"
13
-
14
-
15
- class CatboostModel(Model):
16
- """A class that uses Catboost as a model."""
17
-
18
- @classmethod
19
- def name(cls) -> str:
20
- return "catboost"
21
-
22
- def __init__(self) -> None:
23
- super().__init__()
24
- self._catboost = CatBoostClassifier()
25
-
26
- @property
27
- def estimator(self) -> Any:
28
- return self._catboost
29
-
30
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
31
- iterations = trial.suggest_int("iterations", 100, 10000)
32
- learning_rate = trial.suggest_float("learning_rate", 0.001, 0.3)
33
- depth = trial.suggest_int("depth", 1, 12)
34
- l2_leaf_reg = trial.suggest_float("l2_leaf_reg", 3.0, 50.0)
35
- boosting_type = trial.suggest_categorical("boosting_type", ["Ordered", "Plain"])
36
- self._catboost.set_params(
37
- iterations=iterations,
38
- learning_rate=learning_rate,
39
- depth=depth,
40
- l2_leaf_reg=l2_leaf_reg,
41
- boosting_type=boosting_type,
42
- early_stopping_rounds=100,
43
- )
44
-
45
- def load(self, folder: str) -> None:
46
- self._catboost.load_model(os.path.join(folder, _MODEL_FILENAME))
47
-
48
- def save(self, folder: str) -> None:
49
- self._catboost.save_model(os.path.join(folder, _MODEL_FILENAME))
50
-
51
- def fit(
52
- self,
53
- df: pd.DataFrame,
54
- y: pd.Series | pd.DataFrame | None = None,
55
- w: pd.Series | None = None,
56
- ) -> Self:
57
- train_pool = Pool(
58
- df,
59
- label=y,
60
- weight=w,
61
- )
62
- self._catboost.fit(
63
- train_pool,
64
- early_stopping_rounds=100,
65
- )
66
- return self
67
-
68
- def transform(self, df: pd.DataFrame) -> pd.DataFrame:
69
- pred_pool = Pool(df)
70
- pred = self._catboost.predict(pred_pool)
71
- proba = self._catboost.predict_proba(pred_pool)
72
- df = pd.DataFrame(
73
- index=df.index,
74
- data={
75
- PREDICTION_COLUMN: pred.flatten(),
76
- },
77
- )
78
- for i in range(proba.shape[1]):
79
- df[f"{PROBABILITY_COLUMN_PREFIX}{i}"] = proba[:, i]
80
- return df
File without changes
File without changes
File without changes