wavetrainer 0.0.5__tar.gz → 0.0.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {wavetrainer-0.0.5/wavetrainer.egg-info → wavetrainer-0.0.7}/PKG-INFO +1 -3
  2. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/README.md +0 -1
  3. wavetrainer-0.0.5/wavetrainer.egg-info/requires.txt → wavetrainer-0.0.7/requirements.txt +1 -2
  4. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/setup.py +1 -1
  5. wavetrainer-0.0.7/tests/model/catboost_kwargs_test.py +28 -0
  6. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/tests/trainer_test.py +2 -1
  7. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/__init__.py +1 -1
  8. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/calibrator/calibrator_router.py +5 -0
  9. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/calibrator/mapie_calibrator.py +24 -13
  10. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/calibrator/vennabers_calibrator.py +4 -0
  11. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/fit.py +8 -1
  12. wavetrainer-0.0.7/wavetrainer/model/catboost_classifier_wrap.py +15 -0
  13. wavetrainer-0.0.7/wavetrainer/model/catboost_kwargs.py +50 -0
  14. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/model/catboost_model.py +30 -7
  15. wavetrainer-0.0.7/wavetrainer/model/catboost_regressor_wrap.py +13 -0
  16. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/model/model.py +7 -1
  17. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/model/model_router.py +13 -3
  18. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/reducer/base_selector_reducer.py +4 -0
  19. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/reducer/combined_reducer.py +4 -0
  20. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/reducer/nonnumeric_reducer.py +4 -0
  21. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/selector/selector.py +8 -2
  22. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/trainer.py +9 -5
  23. wavetrainer-0.0.7/wavetrainer/weights/__init__.py +0 -0
  24. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/weights/class_weights.py +4 -0
  25. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/weights/combined_weights.py +4 -0
  26. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/weights/exponential_weights.py +4 -0
  27. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/weights/linear_weights.py +3 -1
  28. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/weights/noop_weights.py +3 -1
  29. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/weights/sigmoid_weights.py +3 -1
  30. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/weights/weights_router.py +4 -0
  31. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/windower/windower.py +4 -0
  32. {wavetrainer-0.0.5 → wavetrainer-0.0.7/wavetrainer.egg-info}/PKG-INFO +1 -3
  33. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer.egg-info/SOURCES.txt +5 -0
  34. wavetrainer-0.0.5/requirements.txt → wavetrainer-0.0.7/wavetrainer.egg-info/requires.txt +0 -1
  35. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/LICENSE +0 -0
  36. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/MANIFEST.in +0 -0
  37. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/setup.cfg +0 -0
  38. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/tests/__init__.py +0 -0
  39. {wavetrainer-0.0.5/wavetrainer/weights → wavetrainer-0.0.7/tests/model}/__init__.py +0 -0
  40. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/calibrator/__init__.py +0 -0
  41. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/calibrator/calibrator.py +0 -0
  42. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/create.py +0 -0
  43. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/exceptions.py +0 -0
  44. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/model/__init__.py +0 -0
  45. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/model_type.py +0 -0
  46. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/params.py +0 -0
  47. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/reducer/__init__.py +0 -0
  48. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/reducer/constant_reducer.py +0 -0
  49. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/reducer/correlation_reducer.py +0 -0
  50. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/reducer/duplicate_reducer.py +0 -0
  51. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/reducer/reducer.py +0 -0
  52. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/selector/__init__.py +0 -0
  53. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/weights/weights.py +0 -0
  54. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer/windower/__init__.py +0 -0
  55. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer.egg-info/dependency_links.txt +0 -0
  56. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer.egg-info/not-zip-safe +0 -0
  57. {wavetrainer-0.0.5 → wavetrainer-0.0.7}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.0.5
3
+ Version: 0.0.7
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
@@ -21,7 +21,6 @@ Requires-Dist: scipy>=1.15.2
21
21
  Requires-Dist: catboost>=1.2.7
22
22
  Requires-Dist: venn-abers>=1.4.6
23
23
  Requires-Dist: mapie>=0.9.2
24
- Requires-Dist: shapiq>=1.2.2
25
24
 
26
25
  # wavetrainer
27
26
 
@@ -49,7 +48,6 @@ Python 3.11.6:
49
48
  - [catboost](https://catboost.ai/)
50
49
  - [venn-abers](https://github.com/ip200/venn-abers)
51
50
  - [mapie](https://mapie.readthedocs.io/en/stable/)
52
- - [shapiq](https://github.com/mmschlk/shapiq)
53
51
 
54
52
  ## Raison D'être :thought_balloon:
55
53
 
@@ -24,7 +24,6 @@ Python 3.11.6:
24
24
  - [catboost](https://catboost.ai/)
25
25
  - [venn-abers](https://github.com/ip200/venn-abers)
26
26
  - [mapie](https://mapie.readthedocs.io/en/stable/)
27
- - [shapiq](https://github.com/mmschlk/shapiq)
28
27
 
29
28
  ## Raison D'être :thought_balloon:
30
29
 
@@ -7,5 +7,4 @@ numpy>=1.26.4
7
7
  scipy>=1.15.2
8
8
  catboost>=1.2.7
9
9
  venn-abers>=1.4.6
10
- mapie>=0.9.2
11
- shapiq>=1.2.2
10
+ mapie>=0.9.2
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
23
23
 
24
24
  setup(
25
25
  name='wavetrainer',
26
- version='0.0.5',
26
+ version='0.0.7',
27
27
  description='A library for automatically finding the optimal model within feature and hyperparameter space.',
28
28
  long_description=long_description,
29
29
  long_description_content_type='text/markdown',
@@ -0,0 +1,28 @@
1
+ """Tests for the catboost kwargs handler class."""
2
+ import unittest
3
+
4
+ import pandas as pd
5
+
6
+ from wavetrainer.model.catboost_kwargs import handle_fit_kwargs
7
+
8
+
9
+ class TestCatboostKwargs(unittest.TestCase):
10
+
11
+ def test_handle_fit_kwargs(self):
12
+ x_train = pd.DataFrame(data={
13
+ "thing": [0.0, 1.0, 2.0, 3.0, 4.0],
14
+ })
15
+ x_train["thing"] = x_train["thing"].astype('category')
16
+ y_train = pd.Series(data=[1.0, 2.0, 3.0, 4.0])
17
+ x_test = pd.DataFrame(data={
18
+ "thing": [0.0, 1.0, 2.0, 3.0, 4.0],
19
+ })
20
+ x_test["thing"] = x_test["thing"].astype('category')
21
+ y_test = pd.Series(data=[1.0, 2.0, 3.0, 4.0])
22
+ args, _ = handle_fit_kwargs(
23
+ x_train,
24
+ y_train,
25
+ eval_set=(x_test, y_test),
26
+ cat_features=x_train.select_dtypes(include="category").columns.tolist(),
27
+ )
28
+ assert len(args) == 2
@@ -20,10 +20,11 @@ class TestTrainer(unittest.TestCase):
20
20
  data={
21
21
  "column1": x_data,
22
22
  "column2": [(x * random.random()) + random.random() for x in x_data],
23
- "column3": [(x / random.random()) - random.random() for x in x_data],
23
+ "column3": [int(((x / random.random()) - random.random()) * 1000.0) for x in x_data],
24
24
  },
25
25
  index=x_index,
26
26
  )
27
+ df["column3"] = df["column3"].astype('category')
27
28
  y = pd.DataFrame(
28
29
  data={
29
30
  "y": [x % 2 == 0 for x in x_data],
@@ -2,5 +2,5 @@
2
2
 
3
3
  from .create import create
4
4
 
5
- __VERSION__ = "0.0.5"
5
+ __VERSION__ = "0.0.7"
6
6
  __all__ = ("create",)
@@ -24,6 +24,8 @@ _CALIBRATORS = {
24
24
  class CalibratorRouter(Calibrator):
25
25
  """A router that routes to a different calibrator class."""
26
26
 
27
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
28
+
27
29
  _calibrator: Calibrator | None
28
30
 
29
31
  def __init__(self, model: Model):
@@ -66,7 +68,10 @@ class CalibratorRouter(Calibrator):
66
68
  df: pd.DataFrame,
67
69
  y: pd.Series | pd.DataFrame | None = None,
68
70
  w: pd.Series | None = None,
71
+ eval_x: pd.DataFrame | None = None,
72
+ eval_y: pd.Series | pd.DataFrame | None = None,
69
73
  ) -> Self:
74
+ # pylint: disable=no-else-return
70
75
  calibrator: Calibrator | None = None
71
76
  if determine_model_type(df) == ModelType.REGRESSION:
72
77
  calibrator = MAPIECalibrator(self._model)
@@ -1,11 +1,13 @@
1
1
  """A calibrator that implements MAPIE."""
2
2
 
3
+ import logging
3
4
  import os
4
5
  from typing import Self
5
6
 
6
7
  import joblib # type: ignore
7
8
  import optuna
8
9
  import pandas as pd
10
+ import sklearn # type: ignore
9
11
  from mapie.regression import MapieRegressor # type: ignore
10
12
 
11
13
  from ..model.model import PROBABILITY_COLUMN_PREFIX, Model
@@ -17,6 +19,8 @@ _CALIBRATOR_FILENAME = "mapie.joblib"
17
19
  class MAPIECalibrator(Calibrator):
18
20
  """A class that uses MAPIE as a calibrator."""
19
21
 
22
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
23
+
20
24
  def __init__(self, model: Model):
21
25
  super().__init__(model)
22
26
  self._mapie = MapieRegressor(model.estimator, method="plus")
@@ -39,27 +43,34 @@ class MAPIECalibrator(Calibrator):
39
43
  df: pd.DataFrame,
40
44
  y: pd.Series | pd.DataFrame | None = None,
41
45
  w: pd.Series | None = None,
46
+ eval_x: pd.DataFrame | None = None,
47
+ eval_y: pd.Series | pd.DataFrame | None = None,
42
48
  ) -> Self:
43
49
  mapie = self._mapie
44
50
  if mapie is None:
45
51
  raise ValueError("mapie is null")
46
52
  if y is None:
47
53
  raise ValueError("y is null")
54
+ if len(df) <= 5:
55
+ return self
48
56
  mapie.fit(df.to_numpy(), y.to_numpy())
49
57
  return self
50
58
 
51
59
  def transform(self, df: pd.DataFrame) -> pd.DataFrame:
52
- alpha = []
53
- for potential_alpha in [0.05, 0.32]:
54
- if len(df) > int(1.0 / potential_alpha):
55
- alpha.append(potential_alpha)
56
- if alpha:
57
- _, y_pis = self._mapie.predict(df, alpha=alpha)
58
- for i in range(y_pis.shape[1]):
59
- if i >= len(alpha):
60
- continue
61
- for ii in range(y_pis.shape[2]):
62
- alpha_val = alpha[i]
63
- values = y_pis[:, i, ii].flatten().tolist()
64
- df[f"{PROBABILITY_COLUMN_PREFIX}{alpha_val}_{ii == 1}"] = values
60
+ try:
61
+ alpha = []
62
+ for potential_alpha in [0.05, 0.32]:
63
+ if len(df) > int(1.0 / potential_alpha) + 1:
64
+ alpha.append(potential_alpha)
65
+ if alpha:
66
+ _, y_pis = self._mapie.predict(df, alpha=alpha)
67
+ for i in range(y_pis.shape[1]):
68
+ if i >= len(alpha):
69
+ continue
70
+ for ii in range(y_pis.shape[2]):
71
+ alpha_val = alpha[i]
72
+ values = y_pis[:, i, ii].flatten().tolist()
73
+ df[f"{PROBABILITY_COLUMN_PREFIX}{alpha_val}_{ii == 1}"] = values
74
+ except sklearn.exceptions.NotFittedError as exc: # type: ignore
75
+ logging.warning(str(exc))
65
76
  return df
@@ -17,6 +17,8 @@ _CALIBRATOR_FILENAME = "vennabers.joblib"
17
17
  class VennabersCalibrator(Calibrator):
18
18
  """A class that uses venn abers as a calibrator."""
19
19
 
20
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
21
+
20
22
  def __init__(self, model: Model):
21
23
  super().__init__(model)
22
24
  self._vennabers = VennAbers()
@@ -39,6 +41,8 @@ class VennabersCalibrator(Calibrator):
39
41
  df: pd.DataFrame,
40
42
  y: pd.Series | pd.DataFrame | None = None,
41
43
  w: pd.Series | None = None,
44
+ eval_x: pd.DataFrame | None = None,
45
+ eval_y: pd.Series | pd.DataFrame | None = None,
42
46
  ) -> Self:
43
47
  vennabers = self._vennabers
44
48
  if vennabers is None:
@@ -8,11 +8,15 @@ import pandas as pd
8
8
  class Fit:
9
9
  """The prototype fit class."""
10
10
 
11
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
12
+
11
13
  def fit(
12
14
  self,
13
15
  df: pd.DataFrame,
14
16
  y: pd.Series | pd.DataFrame | None = None,
15
17
  w: pd.Series | None = None,
18
+ eval_x: pd.DataFrame | None = None,
19
+ eval_y: pd.Series | pd.DataFrame | None = None,
16
20
  ) -> Self:
17
21
  """Fit the dataframe."""
18
22
  raise NotImplementedError("fit not implemented in parent class.")
@@ -25,6 +29,9 @@ class Fit:
25
29
  self,
26
30
  df: pd.DataFrame,
27
31
  y: pd.Series | pd.DataFrame | None = None,
32
+ w: pd.Series | None = None,
33
+ eval_x: pd.DataFrame | None = None,
34
+ eval_y: pd.Series | pd.DataFrame | None = None,
28
35
  ) -> pd.DataFrame:
29
36
  """Fit and then trasnfrom the dataframe."""
30
- return self.fit(df, y=y).transform(df)
37
+ return self.fit(df, y=y, w=w, eval_x=eval_x, eval_y=eval_y).transform(df)
@@ -0,0 +1,15 @@
1
+ """A wrapper for catboost classifier to handle some edge cases."""
2
+
3
+ # pylint: disable=duplicate-code
4
+
5
+ from catboost import CatBoostClassifier # type: ignore
6
+
7
+ from .catboost_kwargs import handle_fit_kwargs
8
+
9
+
10
+ class CatBoostClassifierWrapper(CatBoostClassifier):
11
+ """A wrapper for the catboost classifier."""
12
+
13
+ def fit(self, *args, **kwargs):
14
+ args, kwargs = handle_fit_kwargs(*args, **kwargs)
15
+ return super().fit(*args, **kwargs)
@@ -0,0 +1,50 @@
1
+ """A list of constant catboost kwargs."""
2
+
3
+ from typing import Any
4
+
5
+ import numpy as np
6
+ from catboost import Pool # type: ignore
7
+
8
+ ORIGINAL_X_ARG_KEY = "original_x"
9
+ EVAL_SET_ARG_KEY = "eval_set"
10
+ CAT_FEATURES_ARG_KEY = "cat_features"
11
+
12
+
13
+ def handle_fit_kwargs(*args, **kwargs) -> tuple[tuple[Any, ...], dict[str, Any]]:
14
+ """Handles keyword args coming into a catboost fit method."""
15
+ if ORIGINAL_X_ARG_KEY in kwargs:
16
+ df = kwargs[ORIGINAL_X_ARG_KEY]
17
+ eval_x, eval_y = kwargs[EVAL_SET_ARG_KEY]
18
+ cat_features = kwargs[CAT_FEATURES_ARG_KEY]
19
+ args_list = list(args)
20
+ fit_x = args_list[0]
21
+ fix_x_cp = fit_x.copy()
22
+
23
+ # Stupid code to ensure eval is feature equivalent to train data
24
+ included_columns = []
25
+ for i in range(fix_x_cp.shape[1]):
26
+ arr_col_values = fix_x_cp[:, i]
27
+ for col in df.columns:
28
+ if col in included_columns:
29
+ continue
30
+ df_col_values = df[col].values
31
+ if np.allclose(df_col_values, arr_col_values, equal_nan=True):
32
+ included_columns.append(col)
33
+ break
34
+ # We also need to update cat_features or catboost will yell at us
35
+ cat_features = list(
36
+ set(list(kwargs.get(CAT_FEATURES_ARG_KEY, []))) & set(included_columns)
37
+ )
38
+ args_list[0] = df[included_columns]
39
+ args = tuple(args_list)
40
+
41
+ eval_x = eval_x[included_columns]
42
+ kwargs[EVAL_SET_ARG_KEY] = Pool(
43
+ eval_x,
44
+ label=eval_y,
45
+ cat_features=cat_features,
46
+ )
47
+ kwargs[CAT_FEATURES_ARG_KEY] = cat_features
48
+
49
+ del kwargs[ORIGINAL_X_ARG_KEY]
50
+ return args, kwargs
@@ -6,10 +6,13 @@ from typing import Any, Self
6
6
 
7
7
  import optuna
8
8
  import pandas as pd
9
- from catboost import CatBoostClassifier # type: ignore
10
- from catboost import CatBoost, CatBoostRegressor, Pool
9
+ from catboost import CatBoost, Pool # type: ignore
11
10
 
12
11
  from ..model_type import ModelType, determine_model_type
12
+ from .catboost_classifier_wrap import CatBoostClassifierWrapper
13
+ from .catboost_kwargs import (CAT_FEATURES_ARG_KEY, EVAL_SET_ARG_KEY,
14
+ ORIGINAL_X_ARG_KEY)
15
+ from .catboost_regressor_wrap import CatBoostRegressorWrapper
13
16
  from .model import PREDICTION_COLUMN, PROBABILITY_COLUMN_PREFIX, Model
14
17
 
15
18
  _MODEL_FILENAME = "model.cbm"
@@ -25,6 +28,8 @@ _MODEL_TYPE_KEY = "model_type"
25
28
  class CatboostModel(Model):
26
29
  """A class that uses Catboost as a model."""
27
30
 
31
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
32
+
28
33
  _catboost: CatBoost | None
29
34
  _iterations: None | int
30
35
  _learning_rate: None | float
@@ -51,10 +56,21 @@ class CatboostModel(Model):
51
56
  def estimator(self) -> Any:
52
57
  return self._provide_catboost()
53
58
 
54
- def pre_fit(self, y: pd.Series | pd.DataFrame | None):
59
+ def pre_fit(
60
+ self,
61
+ df: pd.DataFrame,
62
+ y: pd.Series | pd.DataFrame | None,
63
+ eval_x: pd.DataFrame | None = None,
64
+ eval_y: pd.Series | pd.DataFrame | None = None,
65
+ ):
55
66
  if y is None:
56
67
  raise ValueError("y is null.")
57
68
  self._model_type = determine_model_type(y)
69
+ return {
70
+ EVAL_SET_ARG_KEY: (eval_x, eval_y),
71
+ CAT_FEATURES_ARG_KEY: df.select_dtypes(include="category").columns.tolist(),
72
+ ORIGINAL_X_ARG_KEY: df,
73
+ }
58
74
 
59
75
  def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
60
76
  self._iterations = trial.suggest_int(_ITERATIONS_KEY, 100, 10000)
@@ -102,6 +118,8 @@ class CatboostModel(Model):
102
118
  df: pd.DataFrame,
103
119
  y: pd.Series | pd.DataFrame | None = None,
104
120
  w: pd.Series | None = None,
121
+ eval_x: pd.DataFrame | None = None,
122
+ eval_y: pd.Series | pd.DataFrame | None = None,
105
123
  ) -> Self:
106
124
  if y is None:
107
125
  raise ValueError("y is null.")
@@ -113,11 +131,16 @@ class CatboostModel(Model):
113
131
  label=y,
114
132
  weight=w,
115
133
  )
134
+ eval_pool = Pool(
135
+ eval_x,
136
+ label=eval_y,
137
+ )
116
138
  catboost.fit(
117
139
  train_pool,
118
140
  early_stopping_rounds=100,
119
141
  verbose=False,
120
142
  metric_period=100,
143
+ eval_set=eval_pool,
121
144
  )
122
145
  return self
123
146
 
@@ -142,7 +165,7 @@ class CatboostModel(Model):
142
165
  if catboost is None:
143
166
  match self._model_type:
144
167
  case ModelType.BINARY:
145
- catboost = CatBoostClassifier(
168
+ catboost = CatBoostClassifierWrapper(
146
169
  iterations=self._iterations,
147
170
  learning_rate=self._learning_rate,
148
171
  depth=self._depth,
@@ -152,7 +175,7 @@ class CatboostModel(Model):
152
175
  metric_period=100,
153
176
  )
154
177
  case ModelType.REGRESSION:
155
- catboost = CatBoostRegressor(
178
+ catboost = CatBoostRegressorWrapper(
156
179
  iterations=self._iterations,
157
180
  learning_rate=self._learning_rate,
158
181
  depth=self._depth,
@@ -162,7 +185,7 @@ class CatboostModel(Model):
162
185
  metric_period=100,
163
186
  )
164
187
  case ModelType.BINNED_BINARY:
165
- catboost = CatBoostClassifier(
188
+ catboost = CatBoostClassifierWrapper(
166
189
  iterations=self._iterations,
167
190
  learning_rate=self._learning_rate,
168
191
  depth=self._depth,
@@ -172,7 +195,7 @@ class CatboostModel(Model):
172
195
  metric_period=100,
173
196
  )
174
197
  case ModelType.MULTI_CLASSIFICATION:
175
- catboost = CatBoostClassifier(
198
+ catboost = CatBoostClassifierWrapper(
176
199
  iterations=self._iterations,
177
200
  learning_rate=self._learning_rate,
178
201
  depth=self._depth,
@@ -0,0 +1,13 @@
1
+ """A wrapper for catboost regressor to handle some edge cases."""
2
+
3
+ from catboost import CatBoostRegressor # type: ignore
4
+
5
+ from .catboost_kwargs import handle_fit_kwargs
6
+
7
+
8
+ class CatBoostRegressorWrapper(CatBoostRegressor):
9
+ """A wrapper for the catboost regressor."""
10
+
11
+ def fit(self, *args, **kwargs):
12
+ args, kwargs = handle_fit_kwargs(*args, **kwargs)
13
+ return super().fit(*args, **kwargs)
@@ -24,6 +24,12 @@ class Model(Params, Fit):
24
24
  """The estimator backing the model."""
25
25
  raise NotImplementedError("estimator not implemented in parent class.")
26
26
 
27
- def pre_fit(self, y: pd.Series | pd.DataFrame | None) -> None:
27
+ def pre_fit(
28
+ self,
29
+ df: pd.DataFrame,
30
+ y: pd.Series | pd.DataFrame | None,
31
+ eval_x: pd.DataFrame | None = None,
32
+ eval_y: pd.Series | pd.DataFrame | None = None,
33
+ ) -> dict[str, Any]:
28
34
  """A call to make sure the model is prepared for the target type."""
29
35
  raise NotImplementedError("pre_fit not implemented in parent class.")
@@ -20,6 +20,8 @@ _MODELS = {
20
20
  class ModelRouter(Model):
21
21
  """A router that routes to a different weights class."""
22
22
 
23
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
24
+
23
25
  _model: Model | None
24
26
 
25
27
  def __init__(self) -> None:
@@ -37,11 +39,17 @@ class ModelRouter(Model):
37
39
  raise ValueError("model is null")
38
40
  return model.estimator
39
41
 
40
- def pre_fit(self, y: pd.Series | pd.DataFrame | None):
42
+ def pre_fit(
43
+ self,
44
+ df: pd.DataFrame,
45
+ y: pd.Series | pd.DataFrame | None,
46
+ eval_x: pd.DataFrame | None = None,
47
+ eval_y: pd.Series | pd.DataFrame | None = None,
48
+ ) -> dict[str, Any]:
41
49
  model = self._model
42
50
  if model is None:
43
51
  raise ValueError("model is null")
44
- model.pre_fit(y)
52
+ return model.pre_fit(df, y=y, eval_x=eval_x, eval_y=eval_y)
45
53
 
46
54
  def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
47
55
  self._model = _MODELS[
@@ -76,11 +84,13 @@ class ModelRouter(Model):
76
84
  df: pd.DataFrame,
77
85
  y: pd.Series | pd.DataFrame | None = None,
78
86
  w: pd.Series | None = None,
87
+ eval_x: pd.DataFrame | None = None,
88
+ eval_y: pd.Series | pd.DataFrame | None = None,
79
89
  ) -> Self:
80
90
  model = self._model
81
91
  if model is None:
82
92
  raise ValueError("model is null")
83
- model.fit(df, y=y, w=w)
93
+ model.fit(df, y=y, w=w, eval_x=eval_x, eval_y=eval_y)
84
94
  return self
85
95
 
86
96
  def transform(self, df: pd.DataFrame) -> pd.DataFrame:
@@ -15,6 +15,8 @@ from .reducer import Reducer
15
15
  class BaseSelectorReducer(Reducer):
16
16
  """A class that uses the base selector from the feature engine."""
17
17
 
18
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
19
+
18
20
  def __init__(self, base_selector: BaseSelector, file_name: str) -> None:
19
21
  super().__init__()
20
22
  self._base_selector = base_selector
@@ -40,6 +42,8 @@ class BaseSelectorReducer(Reducer):
40
42
  df: pd.DataFrame,
41
43
  y: pd.Series | pd.DataFrame | None = None,
42
44
  w: pd.Series | None = None,
45
+ eval_x: pd.DataFrame | None = None,
46
+ eval_y: pd.Series | pd.DataFrame | None = None,
43
47
  ) -> Self:
44
48
  try:
45
49
  self._base_selector.fit(df) # type: ignore
@@ -20,6 +20,8 @@ _REDUCERS_KEY = "reducers"
20
20
  class CombinedReducer(Reducer):
21
21
  """A reducer that combines a series of reducers."""
22
22
 
23
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
24
+
23
25
  def __init__(self):
24
26
  super().__init__()
25
27
  self._reducers = [
@@ -73,6 +75,8 @@ class CombinedReducer(Reducer):
73
75
  df: pd.DataFrame,
74
76
  y: pd.Series | pd.DataFrame | None = None,
75
77
  w: pd.Series | None = None,
78
+ eval_x: pd.DataFrame | None = None,
79
+ eval_y: pd.Series | pd.DataFrame | None = None,
76
80
  ) -> Self:
77
81
  for reducer in self._reducers:
78
82
  df = reducer.fit_transform(df)
@@ -11,6 +11,8 @@ from .reducer import Reducer
11
11
  class NonNumericReducer(Reducer):
12
12
  """A class that removes non numeric columns from a dataframe."""
13
13
 
14
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
15
+
14
16
  @classmethod
15
17
  def name(cls) -> str:
16
18
  return "nonnumeric"
@@ -29,6 +31,8 @@ class NonNumericReducer(Reducer):
29
31
  df: pd.DataFrame,
30
32
  y: pd.Series | pd.DataFrame | None = None,
31
33
  w: pd.Series | None = None,
34
+ eval_x: pd.DataFrame | None = None,
35
+ eval_y: pd.Series | pd.DataFrame | None = None,
32
36
  ) -> Self:
33
37
  return self
34
38
 
@@ -7,6 +7,7 @@ from typing import Self
7
7
  import joblib # type: ignore
8
8
  import optuna
9
9
  import pandas as pd
10
+ import sklearn # type: ignore
10
11
  from sklearn.feature_selection import RFE # type: ignore
11
12
 
12
13
  from ..fit import Fit
@@ -19,6 +20,8 @@ _SELECTOR_FILE = "selector.joblib"
19
20
  class Selector(Params, Fit):
20
21
  """The selector class."""
21
22
 
23
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
24
+
22
25
  _selector: RFE | None
23
26
 
24
27
  def __init__(self, model: Model):
@@ -43,8 +46,11 @@ class Selector(Params, Fit):
43
46
  df: pd.DataFrame,
44
47
  y: pd.Series | pd.DataFrame | None = None,
45
48
  w: pd.Series | None = None,
49
+ eval_x: pd.DataFrame | None = None,
50
+ eval_y: pd.Series | pd.DataFrame | None = None,
46
51
  ) -> Self:
47
- self._model.pre_fit(y)
52
+ sklearn.set_config(enable_metadata_routing=False)
53
+ model_kwargs = self._model.pre_fit(df, y=y, eval_x=eval_x, eval_y=eval_y)
48
54
  if not isinstance(y, pd.Series):
49
55
  raise ValueError("y is not a series.")
50
56
  n_features_to_select = max(1, int(len(df.columns) * self._feature_ratio))
@@ -57,7 +63,7 @@ class Selector(Params, Fit):
57
63
  ),
58
64
  )
59
65
  try:
60
- self._selector.fit(df, y=y, sample_weight=w)
66
+ self._selector.fit(df, y=y, sample_weight=w, **model_kwargs)
61
67
  except ValueError as exc:
62
68
  # Catch issues with 1 feature as a reduction target.
63
69
  logging.warning(str(exc))
@@ -11,7 +11,7 @@ from typing import Self
11
11
  import optuna
12
12
  import pandas as pd
13
13
  import tqdm
14
- from sklearn.metrics import f1_score, mean_absolute_error # type: ignore
14
+ from sklearn.metrics import f1_score, r2_score # type: ignore
15
15
 
16
16
  from .calibrator.calibrator_router import CalibratorRouter
17
17
  from .exceptions import WavetrainException
@@ -158,6 +158,8 @@ class Trainer(Fit):
158
158
  df: pd.DataFrame,
159
159
  y: pd.Series | pd.DataFrame | None = None,
160
160
  w: pd.Series | None = None,
161
+ eval_x: pd.DataFrame | None = None,
162
+ eval_y: pd.Series | pd.DataFrame | None = None,
161
163
  ) -> Self:
162
164
  """Perform a train on the data to fit to the targets."""
163
165
  if y is None:
@@ -217,10 +219,12 @@ class Trainer(Fit):
217
219
  # Train
218
220
  selector = Selector(model)
219
221
  selector.set_options(trial)
220
- selector.fit(x_train, y=y_train, w=w)
222
+ selector.fit(x_train, y=y_train, w=w, eval_x=x_test, eval_y=y_test)
221
223
  x_train = selector.transform(x_train)
222
224
  x_test = selector.transform(x_test)
223
- x_pred = model.fit_transform(x_train, y=y_train)
225
+ x_pred = model.fit_transform(
226
+ x_train, y=y_train, w=w, eval_x=x_test, eval_y=y_test
227
+ )
224
228
 
225
229
  # Calibrate
226
230
  calibrator = CalibratorRouter(model)
@@ -243,8 +247,8 @@ class Trainer(Fit):
243
247
  y_pred = model.transform(x_test)
244
248
  y_pred = calibrator.transform(y_pred)
245
249
  if determine_model_type(y_series) == ModelType.REGRESSION:
246
- return mean_absolute_error(y_test, y_pred[[PREDICTION_COLUMN]])
247
- return f1_score(y_test, y_pred[[PREDICTION_COLUMN]])
250
+ return float(r2_score(y_test, y_pred[[PREDICTION_COLUMN]]))
251
+ return float(f1_score(y_test, y_pred[[PREDICTION_COLUMN]]))
248
252
  except WavetrainException as exc:
249
253
  logging.warning(str(exc))
250
254
  return -1.0
File without changes
@@ -14,6 +14,8 @@ from .weights import WEIGHTS_COLUMN, Weights
14
14
  class ClassWeights(Weights):
15
15
  """Class weight class."""
16
16
 
17
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
18
+
17
19
  _class_weights: dict[Any, float]
18
20
 
19
21
  def __init__(self) -> None:
@@ -39,6 +41,8 @@ class ClassWeights(Weights):
39
41
  df: pd.DataFrame,
40
42
  y: pd.Series | pd.DataFrame | None = None,
41
43
  w: pd.Series | None = None,
44
+ eval_x: pd.DataFrame | None = None,
45
+ eval_y: pd.Series | pd.DataFrame | None = None,
42
46
  ) -> Self:
43
47
  if not isinstance(y, pd.Series):
44
48
  raise ValueError("y is not a series.")
@@ -13,6 +13,8 @@ from .weights_router import WeightsRouter
13
13
  class CombinedWeights(Weights):
14
14
  """A weights class that combines multiple weights."""
15
15
 
16
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
17
+
16
18
  def __init__(self) -> None:
17
19
  super().__init__()
18
20
  self._weights = [WeightsRouter(), ClassWeights()]
@@ -38,6 +40,8 @@ class CombinedWeights(Weights):
38
40
  df: pd.DataFrame,
39
41
  y: pd.Series | pd.DataFrame | None = None,
40
42
  w: pd.Series | None = None,
43
+ eval_x: pd.DataFrame | None = None,
44
+ eval_y: pd.Series | pd.DataFrame | None = None,
41
45
  ) -> Self:
42
46
  for weights in self._weights:
43
47
  weights.fit(df, y=y)
@@ -12,6 +12,8 @@ from .weights import WEIGHTS_COLUMN, Weights
12
12
  class ExponentialWeights(Weights):
13
13
  """Exponential weight class."""
14
14
 
15
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
16
+
15
17
  @classmethod
16
18
  def name(cls) -> str:
17
19
  """The name of the weight class."""
@@ -31,6 +33,8 @@ class ExponentialWeights(Weights):
31
33
  df: pd.DataFrame,
32
34
  y: pd.Series | pd.DataFrame | None = None,
33
35
  w: pd.Series | None = None,
36
+ eval_x: pd.DataFrame | None = None,
37
+ eval_y: pd.Series | pd.DataFrame | None = None,
34
38
  ) -> Self:
35
39
  return self
36
40
 
@@ -12,7 +12,7 @@ from .weights import WEIGHTS_COLUMN, Weights
12
12
  class LinearWeights(Weights):
13
13
  """Linear weight class."""
14
14
 
15
- # pylint: disable=duplicate-code
15
+ # pylint: disable=duplicate-code,too-many-positional-arguments,too-many-arguments
16
16
 
17
17
  @classmethod
18
18
  def name(cls) -> str:
@@ -33,6 +33,8 @@ class LinearWeights(Weights):
33
33
  df: pd.DataFrame,
34
34
  y: pd.Series | pd.DataFrame | None = None,
35
35
  w: pd.Series | None = None,
36
+ eval_x: pd.DataFrame | None = None,
37
+ eval_y: pd.Series | pd.DataFrame | None = None,
36
38
  ) -> Self:
37
39
  return self
38
40
 
@@ -12,7 +12,7 @@ from .weights import WEIGHTS_COLUMN, Weights
12
12
  class NoopWeights(Weights):
13
13
  """Noop weight class."""
14
14
 
15
- # pylint: disable=duplicate-code
15
+ # pylint: disable=duplicate-code,too-many-positional-arguments,too-many-arguments
16
16
 
17
17
  @classmethod
18
18
  def name(cls) -> str:
@@ -33,6 +33,8 @@ class NoopWeights(Weights):
33
33
  df: pd.DataFrame,
34
34
  y: pd.Series | pd.DataFrame | None = None,
35
35
  w: pd.Series | None = None,
36
+ eval_x: pd.DataFrame | None = None,
37
+ eval_y: pd.Series | pd.DataFrame | None = None,
36
38
  ) -> Self:
37
39
  return self
38
40
 
@@ -13,7 +13,7 @@ from .weights import WEIGHTS_COLUMN, Weights
13
13
  class SigmoidWeights(Weights):
14
14
  """Sigmoid weight class."""
15
15
 
16
- # pylint: disable=duplicate-code
16
+ # pylint: disable=duplicate-code,too-many-positional-arguments,too-many-arguments
17
17
 
18
18
  @classmethod
19
19
  def name(cls) -> str:
@@ -34,6 +34,8 @@ class SigmoidWeights(Weights):
34
34
  df: pd.DataFrame,
35
35
  y: pd.Series | pd.DataFrame | None = None,
36
36
  w: pd.Series | None = None,
37
+ eval_x: pd.DataFrame | None = None,
38
+ eval_y: pd.Series | pd.DataFrame | None = None,
37
39
  ) -> Self:
38
40
  return self
39
41
 
@@ -26,6 +26,8 @@ _WEIGHTS = {
26
26
  class WeightsRouter(Weights):
27
27
  """A router that routes to a different weights class."""
28
28
 
29
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
30
+
29
31
  _weights: Weights | None
30
32
 
31
33
  def __init__(self) -> None:
@@ -71,6 +73,8 @@ class WeightsRouter(Weights):
71
73
  df: pd.DataFrame,
72
74
  y: pd.Series | pd.DataFrame | None = None,
73
75
  w: pd.Series | None = None,
76
+ eval_x: pd.DataFrame | None = None,
77
+ eval_y: pd.Series | pd.DataFrame | None = None,
74
78
  ) -> Self:
75
79
  return self
76
80
 
@@ -18,6 +18,8 @@ _LOOKBACK_KEY = "lookback"
18
18
  class Windower(Params, Fit):
19
19
  """The windower class."""
20
20
 
21
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
22
+
21
23
  _lookback_ratio: float | None
22
24
 
23
25
  def __init__(self, dt_column: str | None):
@@ -48,6 +50,8 @@ class Windower(Params, Fit):
48
50
  df: pd.DataFrame,
49
51
  y: pd.Series | pd.DataFrame | None = None,
50
52
  w: pd.Series | None = None,
53
+ eval_x: pd.DataFrame | None = None,
54
+ eval_y: pd.Series | pd.DataFrame | None = None,
51
55
  ) -> Self:
52
56
  lookback_ratio = self._lookback_ratio
53
57
  if lookback_ratio is None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.0.5
3
+ Version: 0.0.7
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
@@ -21,7 +21,6 @@ Requires-Dist: scipy>=1.15.2
21
21
  Requires-Dist: catboost>=1.2.7
22
22
  Requires-Dist: venn-abers>=1.4.6
23
23
  Requires-Dist: mapie>=0.9.2
24
- Requires-Dist: shapiq>=1.2.2
25
24
 
26
25
  # wavetrainer
27
26
 
@@ -49,7 +48,6 @@ Python 3.11.6:
49
48
  - [catboost](https://catboost.ai/)
50
49
  - [venn-abers](https://github.com/ip200/venn-abers)
51
50
  - [mapie](https://mapie.readthedocs.io/en/stable/)
52
- - [shapiq](https://github.com/mmschlk/shapiq)
53
51
 
54
52
  ## Raison D'être :thought_balloon:
55
53
 
@@ -5,6 +5,8 @@ requirements.txt
5
5
  setup.py
6
6
  tests/__init__.py
7
7
  tests/trainer_test.py
8
+ tests/model/__init__.py
9
+ tests/model/catboost_kwargs_test.py
8
10
  wavetrainer/__init__.py
9
11
  wavetrainer/create.py
10
12
  wavetrainer/exceptions.py
@@ -24,7 +26,10 @@ wavetrainer/calibrator/calibrator_router.py
24
26
  wavetrainer/calibrator/mapie_calibrator.py
25
27
  wavetrainer/calibrator/vennabers_calibrator.py
26
28
  wavetrainer/model/__init__.py
29
+ wavetrainer/model/catboost_classifier_wrap.py
30
+ wavetrainer/model/catboost_kwargs.py
27
31
  wavetrainer/model/catboost_model.py
32
+ wavetrainer/model/catboost_regressor_wrap.py
28
33
  wavetrainer/model/model.py
29
34
  wavetrainer/model/model_router.py
30
35
  wavetrainer/reducer/__init__.py
@@ -8,4 +8,3 @@ scipy>=1.15.2
8
8
  catboost>=1.2.7
9
9
  venn-abers>=1.4.6
10
10
  mapie>=0.9.2
11
- shapiq>=1.2.2
File without changes
File without changes
File without changes