wavetrainer 0.0.43__tar.gz → 0.0.44__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. {wavetrainer-0.0.43/wavetrainer.egg-info → wavetrainer-0.0.44}/PKG-INFO +1 -1
  2. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/setup.py +1 -1
  3. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/tests/trainer_test.py +1 -1
  4. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/__init__.py +1 -1
  5. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/calibrator/calibrator_router.py +3 -0
  6. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/calibrator/mapie_calibrator.py +9 -6
  7. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model/catboost/catboost_kwargs.py +10 -7
  8. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model/catboost/catboost_model.py +11 -4
  9. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model/model.py +8 -0
  10. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model/model_router.py +12 -0
  11. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model/tabpfn/tabpfn_model.py +6 -0
  12. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model/xgboost/xgboost_model.py +16 -3
  13. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/reducer/base_selector_reducer.py +0 -3
  14. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/reducer/combined_reducer.py +3 -2
  15. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/reducer/correlation_reducer.py +1 -1
  16. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/reducer/smart_correlation_reducer.py +6 -1
  17. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/selector/selector.py +8 -2
  18. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/trainer.py +4 -2
  19. {wavetrainer-0.0.43 → wavetrainer-0.0.44/wavetrainer.egg-info}/PKG-INFO +1 -1
  20. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/LICENSE +0 -0
  21. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/MANIFEST.in +0 -0
  22. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/README.md +0 -0
  23. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/requirements.txt +0 -0
  24. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/setup.cfg +0 -0
  25. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/tests/__init__.py +0 -0
  26. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/tests/model/__init__.py +0 -0
  27. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/tests/model/catboost_kwargs_test.py +0 -0
  28. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/calibrator/__init__.py +0 -0
  29. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/calibrator/calibrator.py +0 -0
  30. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/calibrator/vennabers_calibrator.py +0 -0
  31. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/create.py +0 -0
  32. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/exceptions.py +0 -0
  33. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/fit.py +0 -0
  34. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model/__init__.py +0 -0
  35. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model/catboost/__init__.py +0 -0
  36. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model/catboost/catboost_classifier_wrap.py +0 -0
  37. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model/catboost/catboost_regressor_wrap.py +0 -0
  38. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model/tabpfn/__init__.py +0 -0
  39. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model/xgboost/__init__.py +0 -0
  40. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model/xgboost/early_stopper.py +0 -0
  41. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model/xgboost/xgboost_logger.py +0 -0
  42. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model_type.py +0 -0
  43. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/params.py +0 -0
  44. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/reducer/__init__.py +0 -0
  45. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/reducer/constant_reducer.py +0 -0
  46. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/reducer/duplicate_reducer.py +0 -0
  47. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/reducer/non_categorical_numeric_columns.py +0 -0
  48. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/reducer/nonnumeric_reducer.py +0 -0
  49. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/reducer/reducer.py +0 -0
  50. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/reducer/select_by_single_feature_performance_reducer.py +0 -0
  51. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/reducer/unseen_reducer.py +0 -0
  52. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/selector/__init__.py +0 -0
  53. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/weights/__init__.py +0 -0
  54. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/weights/class_weights.py +0 -0
  55. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/weights/combined_weights.py +0 -0
  56. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/weights/exponential_weights.py +0 -0
  57. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/weights/linear_weights.py +0 -0
  58. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/weights/noop_weights.py +0 -0
  59. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/weights/sigmoid_weights.py +0 -0
  60. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/weights/weights.py +0 -0
  61. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/weights/weights_router.py +0 -0
  62. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/windower/__init__.py +0 -0
  63. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/windower/windower.py +0 -0
  64. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer.egg-info/SOURCES.txt +0 -0
  65. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer.egg-info/dependency_links.txt +0 -0
  66. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer.egg-info/not-zip-safe +0 -0
  67. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer.egg-info/requires.txt +0 -0
  68. {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.0.43
3
+ Version: 0.0.44
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
23
23
 
24
24
  setup(
25
25
  name='wavetrainer',
26
- version='0.0.43',
26
+ version='0.0.44',
27
27
  description='A library for automatically finding the optimal model within feature and hyperparameter space.',
28
28
  long_description=long_description,
29
29
  long_description_content_type='text/markdown',
@@ -40,7 +40,7 @@ class TestTrainer(unittest.TestCase):
40
40
 
41
41
  def test_trainer_dt_column(self):
42
42
  with tempfile.TemporaryDirectory() as tmpdir:
43
- trainer = Trainer(tmpdir, walkforward_timedelta=datetime.timedelta(days=7), trials=1, dt_column="dt_column")
43
+ trainer = Trainer(tmpdir, walkforward_timedelta=datetime.timedelta(days=7), trials=5, dt_column="dt_column")
44
44
  x_data = [i for i in range(100)]
45
45
  x_index = [datetime.datetime(2022, 1, 1) + datetime.timedelta(days=i) for i in range(len(x_data))]
46
46
  df = pd.DataFrame(
@@ -2,5 +2,5 @@
2
2
 
3
3
  from .create import create
4
4
 
5
- __VERSION__ = "0.0.43"
5
+ __VERSION__ = "0.0.44"
6
6
  __all__ = ("create",)
@@ -37,6 +37,9 @@ class CalibratorRouter(Calibrator):
37
37
  return "router"
38
38
 
39
39
  def predictions_as_x(self, y: pd.Series | pd.DataFrame | None = None) -> bool:
40
+ calibrator = self._calibrator
41
+ if calibrator is not None:
42
+ return calibrator.predictions_as_x(None)
40
43
  if y is None:
41
44
  raise ValueError("y is null")
42
45
  if determine_model_type(y) == ModelType.REGRESSION:
@@ -21,7 +21,7 @@ class MAPIECalibrator(Calibrator):
21
21
 
22
22
  def __init__(self, model: Model):
23
23
  super().__init__(model)
24
- self._mapie = MapieRegressor(model.create_estimator(), method="plus")
24
+ self._mapie = MapieRegressor(model.create_estimator(), method="plus", cv=5)
25
25
 
26
26
  @classmethod
27
27
  def name(cls) -> str:
@@ -54,19 +54,22 @@ class MAPIECalibrator(Calibrator):
54
54
  raise ValueError("mapie is null")
55
55
  if y is None:
56
56
  raise ValueError("y is null")
57
- if len(df) <= 5:
58
- return self
59
- mapie.fit(df.to_numpy(), y.to_numpy())
57
+ mapie.fit(self._model.convert_df(df), y)
60
58
  return self
61
59
 
62
60
  def transform(self, df: pd.DataFrame) -> pd.DataFrame:
63
61
  alpha = []
64
62
  for potential_alpha in [0.05, 0.32]:
65
- if len(df) > int(1.0 / potential_alpha) + 1:
63
+ if (
64
+ len(df) > int(1.0 / potential_alpha) + 1
65
+ and len(df) > int(1.0 / (1.0 - potential_alpha)) + 1
66
+ ):
66
67
  alpha.append(potential_alpha)
67
68
  ret_df = pd.DataFrame(index=df.index)
68
69
  if alpha:
69
- _, y_pis = self._mapie.predict(df, alpha=alpha)
70
+ _, y_pis = self._mapie.predict(
71
+ self._model.convert_df(df), alpha=alpha, allow_infinite_bounds=True
72
+ )
70
73
  for i in range(y_pis.shape[1]):
71
74
  if i >= len(alpha):
72
75
  continue
@@ -3,6 +3,7 @@
3
3
  from typing import Any
4
4
 
5
5
  import numpy as np
6
+ import pandas as pd
6
7
  from catboost import Pool # type: ignore
7
8
 
8
9
  ORIGINAL_X_ARG_KEY = "original_x"
@@ -12,12 +13,17 @@ CAT_FEATURES_ARG_KEY = "cat_features"
12
13
 
13
14
  def handle_fit_kwargs(*args, **kwargs) -> tuple[tuple[Any, ...], dict[str, Any]]:
14
15
  """Handles keyword args coming into a catboost fit method."""
16
+ args_list = list(args)
17
+ fit_x = args_list[0]
18
+
19
+ cat_features = kwargs.get(CAT_FEATURES_ARG_KEY)
20
+ if cat_features is None and isinstance(fit_x, pd.DataFrame):
21
+ cat_features = fit_x.select_dtypes(include="category").columns.tolist()
22
+ kwargs[CAT_FEATURES_ARG_KEY] = cat_features
23
+
15
24
  if ORIGINAL_X_ARG_KEY in kwargs:
16
25
  df = kwargs[ORIGINAL_X_ARG_KEY]
17
26
  eval_x, eval_y = kwargs[EVAL_SET_ARG_KEY]
18
- cat_features = kwargs[CAT_FEATURES_ARG_KEY]
19
- args_list = list(args)
20
- fit_x = args_list[0]
21
27
  fix_x_cp = fit_x.copy()
22
28
 
23
29
  # Stupid code to ensure eval is feature equivalent to train data
@@ -32,9 +38,6 @@ def handle_fit_kwargs(*args, **kwargs) -> tuple[tuple[Any, ...], dict[str, Any]]
32
38
  included_columns.append(col)
33
39
  break
34
40
  # We also need to update cat_features or catboost will yell at us
35
- cat_features = list(
36
- set(list(kwargs.get(CAT_FEATURES_ARG_KEY, []))) & set(included_columns)
37
- )
38
41
  args_list[0] = df[included_columns]
39
42
  args = tuple(args_list)
40
43
 
@@ -45,7 +48,7 @@ def handle_fit_kwargs(*args, **kwargs) -> tuple[tuple[Any, ...], dict[str, Any]]
45
48
  label=eval_y,
46
49
  cat_features=cat_features,
47
50
  )
48
- kwargs[CAT_FEATURES_ARG_KEY] = cat_features
49
51
 
50
52
  del kwargs[ORIGINAL_X_ARG_KEY]
53
+
51
54
  return args, kwargs
@@ -2,12 +2,12 @@
2
2
 
3
3
  # pylint: disable=line-too-long
4
4
  import json
5
- import logging
6
5
  import os
7
6
  from typing import Self
8
7
 
9
8
  import optuna
10
9
  import pandas as pd
10
+ import pytest_is_running
11
11
  import torch
12
12
  from catboost import CatBoost, Pool # type: ignore
13
13
 
@@ -83,12 +83,21 @@ class CatboostModel(Model):
83
83
  def create_estimator(self):
84
84
  return self._create_catboost()
85
85
 
86
+ def reset(self):
87
+ self._catboost = None
88
+ self._best_iteration = None
89
+
90
+ def convert_df(self, df: pd.DataFrame) -> pd.DataFrame:
91
+ return df
92
+
86
93
  def set_options(
87
94
  self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
88
95
  ) -> None:
89
96
  self._iterations = trial.suggest_int(_ITERATIONS_KEY, 100, 10000)
90
97
  self._learning_rate = trial.suggest_float(_LEARNING_RATE_KEY, 0.001, 0.3)
91
- self._depth = trial.suggest_int(_DEPTH_KEY, 1, 6)
98
+ self._depth = trial.suggest_int(
99
+ _DEPTH_KEY, 1, 2 if pytest_is_running.is_running() else 6
100
+ )
92
101
  self._l2_leaf_reg = trial.suggest_float(_L2_LEAF_REG_KEY, 3.0, 50.0)
93
102
  self._boosting_type = trial.suggest_categorical(
94
103
  _BOOSTING_TYPE_KEY, ["Ordered", "Plain"]
@@ -170,8 +179,6 @@ class CatboostModel(Model):
170
179
  metric_period=100,
171
180
  eval_set=eval_pool,
172
181
  )
173
- importances = catboost.get_feature_importance(prettified=True)
174
- logging.info("Importances:\n%s", importances)
175
182
  self._best_iteration = catboost.get_best_iteration()
176
183
  return self
177
184
 
@@ -46,3 +46,11 @@ class Model(Params, Fit):
46
46
  def create_estimator(self) -> Any:
47
47
  """Creates a new estimator."""
48
48
  raise NotImplementedError("creates_estimator not implemented in parent class.")
49
+
50
+ def reset(self) -> None:
51
+ """Resets a model."""
52
+ raise NotImplementedError("reset not implemented in parent class.")
53
+
54
+ def convert_df(self, df: pd.DataFrame) -> pd.DataFrame:
55
+ """Converts a dataframe for use with a model."""
56
+ raise NotImplementedError("convert_df not implemented in parent class.")
@@ -66,6 +66,18 @@ class ModelRouter(Model):
66
66
  raise ValueError("model is null")
67
67
  return model.create_estimator()
68
68
 
69
+ def reset(self):
70
+ model = self._model
71
+ if model is None:
72
+ raise ValueError("model is null")
73
+ model.reset()
74
+
75
+ def convert_df(self, df: pd.DataFrame) -> pd.DataFrame:
76
+ model = self._model
77
+ if model is None:
78
+ raise ValueError("model is null")
79
+ return model.convert_df(df)
80
+
69
81
  def set_options(
70
82
  self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
71
83
  ) -> None:
@@ -56,6 +56,12 @@ class TabPFNModel(Model):
56
56
  def create_estimator(self):
57
57
  return self._create_tabpfn()
58
58
 
59
+ def reset(self):
60
+ pass
61
+
62
+ def convert_df(self, df: pd.DataFrame) -> pd.DataFrame:
63
+ return df
64
+
59
65
  def set_options(
60
66
  self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
61
67
  ) -> None:
@@ -7,6 +7,7 @@ from typing import Self
7
7
 
8
8
  import optuna
9
9
  import pandas as pd
10
+ import pytest_is_running
10
11
  import torch
11
12
  from xgboost import XGBClassifier, XGBRegressor
12
13
  from xgboost.callback import TrainingCallback
@@ -119,6 +120,13 @@ class XGBoostModel(Model):
119
120
  def create_estimator(self):
120
121
  return self._create_xgboost()
121
122
 
123
+ def reset(self):
124
+ self._xgboost = None
125
+ self._best_iteration = None
126
+
127
+ def convert_df(self, df: pd.DataFrame) -> pd.DataFrame:
128
+ return _convert_categoricals(df)
129
+
122
130
  def set_options(
123
131
  self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
124
132
  ) -> None:
@@ -130,7 +138,9 @@ class XGBoostModel(Model):
130
138
  self._subsample = trial.suggest_float(_SUBSAMPLE_KEY, 0.2, 1.0)
131
139
  self._colsample_bytree = trial.suggest_float(_COLSAMPLE_BYTREE_KEY, 0.2, 1.0)
132
140
  if self._booster in ["gbtree", "dart"]:
133
- self._max_depth = trial.suggest_int(_MAX_DEPTH_KEY, 3, 9)
141
+ self._max_depth = trial.suggest_int(
142
+ _MAX_DEPTH_KEY, 3, 4 if pytest_is_running.is_running() else 9
143
+ )
134
144
  self._min_child_weight = trial.suggest_int(
135
145
  _MIN_CHILD_WEIGHT_KEY, 2, 10, log=True
136
146
  )
@@ -148,7 +158,9 @@ class XGBoostModel(Model):
148
158
  )
149
159
  self._rate_drop = trial.suggest_float(_RATE_DROP_KEY, 1e-8, 1.0, log=True)
150
160
  self._skip_drop = trial.suggest_float(_SKIP_DROP_KEY, 1e-8, 1.0, log=True)
151
- self._num_boost_rounds = trial.suggest_int(_NUM_BOOST_ROUNDS_KEY, 100, 10000)
161
+ self._num_boost_rounds = trial.suggest_int(
162
+ _NUM_BOOST_ROUNDS_KEY, 100, 110 if pytest_is_running.is_running() else 10000
163
+ )
152
164
  self._early_stopping_rounds = trial.suggest_int(
153
165
  _EARLY_STOPPING_ROUNDS_KEY, 50, 500
154
166
  )
@@ -279,7 +291,8 @@ class XGBoostModel(Model):
279
291
  )
280
292
  param = {
281
293
  "objective": "binary:logistic",
282
- "tree_method": "gpu_hist" if torch.cuda.is_available() else "exact",
294
+ "tree_method": "hist" if torch.cuda.is_available() else "exact",
295
+ "device": "cuda" if torch.cuda.is_available() else "cpu",
283
296
  # defines booster, gblinear for linear functions.
284
297
  "booster": self._booster,
285
298
  # L2 regularization weight.
@@ -7,7 +7,6 @@ import joblib # type: ignore
7
7
  import optuna
8
8
  import pandas as pd
9
9
  from feature_engine.selection.base_selector import BaseSelector
10
- from sklearn.utils.validation import check_is_fitted # type: ignore
11
10
 
12
11
  from .reducer import Reducer
13
12
 
@@ -60,6 +59,4 @@ class BaseSelectorReducer(Reducer):
60
59
  def transform(self, df: pd.DataFrame) -> pd.DataFrame:
61
60
  if len(df.columns) <= 1:
62
61
  return df
63
- if not check_is_fitted(self._base_selector):
64
- return df
65
62
  return self._base_selector.transform(df)
@@ -1,5 +1,6 @@
1
1
  """A reducer that combines all the other reducers."""
2
2
 
3
+ # pylint: disable=line-too-long
3
4
  import json
4
5
  import logging
5
6
  import os
@@ -107,8 +108,8 @@ class CombinedReducer(Reducer):
107
108
  removed_columns = before_columns.difference(after_columns)
108
109
  if removed_columns:
109
110
  removed_columns_dict[reducer.name()] = list(removed_columns)
110
- logging.info(
111
- "%s reducer took %f", reducer.name(), time.time() - start_reducer
111
+ print(
112
+ f"{reducer.name()} reducer took {time.time() - start_reducer} and removed {len(removed_columns)} features",
112
113
  )
113
114
  if self._folder is not None:
114
115
  with open(
@@ -37,7 +37,7 @@ class CorrelationReducer(BaseSelectorReducer):
37
37
  self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
38
38
  ) -> None:
39
39
  self._correlation_selector.threshold = trial.suggest_float(
40
- _CORRELATION_REDUCER_THRESHOLD, 0.1, 0.9
40
+ _CORRELATION_REDUCER_THRESHOLD, 0.7, 0.99
41
41
  )
42
42
 
43
43
  def fit(
@@ -33,7 +33,7 @@ class SmartCorrelationReducer(BaseSelectorReducer):
33
33
  self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
34
34
  ) -> None:
35
35
  self._correlation_selector.threshold = trial.suggest_float(
36
- _SMART_CORRELATION_REDUCER_THRESHOLD, 0.1, 0.9
36
+ _SMART_CORRELATION_REDUCER_THRESHOLD, 0.7, 0.99
37
37
  )
38
38
 
39
39
  def fit(
@@ -48,3 +48,8 @@ class SmartCorrelationReducer(BaseSelectorReducer):
48
48
  if len(self._correlation_selector.variables) <= 1:
49
49
  return self
50
50
  return super().fit(df, y=y, w=w, eval_x=eval_x, eval_y=eval_y)
51
+
52
+ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
53
+ if len(find_non_categorical_numeric_columns(df)) <= 1:
54
+ return df
55
+ return super().transform(df)
@@ -57,7 +57,8 @@ class Selector(Params, Fit):
57
57
  return self
58
58
  if not isinstance(y, pd.Series):
59
59
  raise ValueError("y is not a series.")
60
- if len(df.columns) <= 1:
60
+ total_columns = len(df.columns)
61
+ if total_columns <= 1:
61
62
  return self
62
63
  print(
63
64
  f"Performing feature selection with {self._steps} steps and a total ratio of {self._feature_ratio}"
@@ -81,6 +82,9 @@ class Selector(Params, Fit):
81
82
  if not current_features:
82
83
  current_features = [list(feature_importances.keys())[0]]
83
84
  current_features = current_features[:required_features]
85
+ print(
86
+ f"Current Features:\n{pd.Series(data=list(feature_importances.values()), index=list(feature_importances.keys()))}\n"
87
+ )
84
88
 
85
89
  n_features = len(current_features)
86
90
  for i in range(self._steps):
@@ -90,16 +94,18 @@ class Selector(Params, Fit):
90
94
  ratio_diff = 1.0 - self._feature_ratio
91
95
  ratio_step = ratio_diff / float(self._steps)
92
96
  current_ratio = 1.0 - (ratio_step * i)
93
- n_features = max(1, int(len(df.columns) * current_ratio))
97
+ n_features = max(1, int(total_columns * current_ratio))
94
98
  if n_features >= len(current_features):
95
99
  continue
96
100
 
101
+ self._model.reset()
97
102
  self._model.fit(df, y=y, w=w, eval_x=eval_x, eval_y=eval_y)
98
103
  set_current_features(n_features)
99
104
  print(f"Reduced features to {len(current_features)}")
100
105
  df = df[current_features]
101
106
  if eval_x is not None:
102
107
  eval_x = eval_x[current_features]
108
+ print(f"Final feature count: {len(current_features)}")
103
109
 
104
110
  self._selector = current_features
105
111
 
@@ -212,7 +212,7 @@ class Trainer(Fit):
212
212
  folder = os.path.join(
213
213
  self._folder, str(y_series.name), split_idx.isoformat()
214
214
  )
215
- new_folder = os.path.exists(folder)
215
+ new_folder = not os.path.exists(folder)
216
216
  os.makedirs(folder, exist_ok=True)
217
217
  trial_file = os.path.join(folder, _TRIAL_FILENAME)
218
218
  if os.path.exists(trial_file):
@@ -517,7 +517,9 @@ class Trainer(Fit):
517
517
  x_pred = reducer.transform(group[feature_columns])
518
518
  x_pred = selector.transform(x_pred)
519
519
  y_pred = model.transform(x_pred)
520
- y_pred = calibrator.transform(y_pred)
520
+ y_pred = calibrator.transform(
521
+ y_pred if calibrator.predictions_as_x(None) else x_pred
522
+ )
521
523
  for new_column in y_pred.columns.values:
522
524
  group["_".join([column, new_column])] = y_pred[new_column]
523
525
  return group
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.0.43
3
+ Version: 0.0.44
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
File without changes
File without changes
File without changes
File without changes