wavetrainer 0.0.26__tar.gz → 0.0.28__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. {wavetrainer-0.0.26/wavetrainer.egg-info → wavetrainer-0.0.28}/PKG-INFO +1 -1
  2. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/setup.py +1 -1
  3. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/__init__.py +1 -1
  4. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/calibrator/calibrator_router.py +9 -4
  5. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/calibrator/mapie_calibrator.py +4 -2
  6. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/calibrator/vennabers_calibrator.py +4 -2
  7. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/create.py +0 -2
  8. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/model/catboost_model.py +32 -12
  9. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/model/model.py +5 -0
  10. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/model/model_router.py +15 -5
  11. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/model/tabpfn_model.py +8 -2
  12. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/params.py +5 -2
  13. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/reducer/base_selector_reducer.py +4 -2
  14. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/reducer/combined_reducer.py +11 -10
  15. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/reducer/correlation_reducer.py +12 -1
  16. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/reducer/nonnumeric_reducer.py +4 -2
  17. wavetrainer-0.0.28/wavetrainer/reducer/smart_correlation_reducer.py +32 -0
  18. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/reducer/unseen_reducer.py +4 -2
  19. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/selector/selector.py +4 -2
  20. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/trainer.py +15 -19
  21. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/weights/class_weights.py +4 -2
  22. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/weights/combined_weights.py +6 -4
  23. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/weights/exponential_weights.py +4 -2
  24. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/weights/linear_weights.py +4 -2
  25. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/weights/noop_weights.py +4 -2
  26. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/weights/sigmoid_weights.py +4 -2
  27. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/weights/weights_router.py +5 -2
  28. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/windower/windower.py +4 -2
  29. {wavetrainer-0.0.26 → wavetrainer-0.0.28/wavetrainer.egg-info}/PKG-INFO +1 -1
  30. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer.egg-info/SOURCES.txt +1 -1
  31. wavetrainer-0.0.26/wavetrainer/reducer/pca_reducer.py +0 -77
  32. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/LICENSE +0 -0
  33. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/MANIFEST.in +0 -0
  34. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/README.md +0 -0
  35. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/requirements.txt +0 -0
  36. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/setup.cfg +0 -0
  37. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/tests/__init__.py +0 -0
  38. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/tests/model/__init__.py +0 -0
  39. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/tests/model/catboost_kwargs_test.py +0 -0
  40. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/tests/trainer_test.py +0 -0
  41. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/calibrator/__init__.py +0 -0
  42. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/calibrator/calibrator.py +0 -0
  43. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/exceptions.py +0 -0
  44. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/fit.py +0 -0
  45. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/model/__init__.py +0 -0
  46. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/model/catboost_classifier_wrap.py +0 -0
  47. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/model/catboost_kwargs.py +0 -0
  48. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/model/catboost_regressor_wrap.py +0 -0
  49. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/model_type.py +0 -0
  50. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/reducer/__init__.py +0 -0
  51. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/reducer/constant_reducer.py +0 -0
  52. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/reducer/duplicate_reducer.py +0 -0
  53. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/reducer/reducer.py +0 -0
  54. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/selector/__init__.py +0 -0
  55. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/weights/__init__.py +0 -0
  56. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/weights/weights.py +0 -0
  57. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer/windower/__init__.py +0 -0
  58. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer.egg-info/dependency_links.txt +0 -0
  59. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer.egg-info/not-zip-safe +0 -0
  60. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer.egg-info/requires.txt +0 -0
  61. {wavetrainer-0.0.26 → wavetrainer-0.0.28}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.0.26
3
+ Version: 0.0.28
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
23
23
 
24
24
  setup(
25
25
  name='wavetrainer',
26
- version='0.0.26',
26
+ version='0.0.28',
27
27
  description='A library for automatically finding the optimal model within feature and hyperparameter space.',
28
28
  long_description=long_description,
29
29
  long_description_content_type='text/markdown',
@@ -2,5 +2,5 @@
2
2
 
3
3
  from .create import create
4
4
 
5
- __VERSION__ = "0.0.26"
5
+ __VERSION__ = "0.0.28"
6
6
  __all__ = ("create",)
@@ -36,8 +36,13 @@ class CalibratorRouter(Calibrator):
36
36
  def name(cls) -> str:
37
37
  return "router"
38
38
 
39
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
40
- pass
39
+ def set_options(
40
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
41
+ ) -> None:
42
+ calibrator = self._calibrator
43
+ if calibrator is None:
44
+ return
45
+ calibrator.set_options(trial, df)
41
46
 
42
47
  def load(self, folder: str) -> None:
43
48
  with open(
@@ -48,11 +53,11 @@ class CalibratorRouter(Calibrator):
48
53
  calibrator.load(folder)
49
54
  self._calibrator = calibrator
50
55
 
51
- def save(self, folder: str) -> None:
56
+ def save(self, folder: str, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
52
57
  calibrator = self._calibrator
53
58
  if calibrator is None:
54
59
  raise ValueError("calibrator is null.")
55
- calibrator.save(folder)
60
+ calibrator.save(folder, trial)
56
61
  with open(
57
62
  os.path.join(folder, _CALIBRATOR_ROUTER_FILE), "w", encoding="utf8"
58
63
  ) as handle:
@@ -29,13 +29,15 @@ class MAPIECalibrator(Calibrator):
29
29
  def name(cls) -> str:
30
30
  return "mapie"
31
31
 
32
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
32
+ def set_options(
33
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
34
+ ) -> None:
33
35
  pass
34
36
 
35
37
  def load(self, folder: str) -> None:
36
38
  self._mapie = joblib.load(os.path.join(folder, _CALIBRATOR_FILENAME))
37
39
 
38
- def save(self, folder: str) -> None:
40
+ def save(self, folder: str, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
39
41
  joblib.dump(self._mapie, os.path.join(folder, _CALIBRATOR_FILENAME))
40
42
 
41
43
  def fit(
@@ -27,13 +27,15 @@ class VennabersCalibrator(Calibrator):
27
27
  def name(cls) -> str:
28
28
  return "vennabers"
29
29
 
30
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
30
+ def set_options(
31
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
32
+ ) -> None:
31
33
  pass
32
34
 
33
35
  def load(self, folder: str) -> None:
34
36
  self._vennabers = joblib.load(os.path.join(folder, _CALIBRATOR_FILENAME))
35
37
 
36
- def save(self, folder: str) -> None:
38
+ def save(self, folder: str, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
37
39
  joblib.dump(self._vennabers, os.path.join(folder, _CALIBRATOR_FILENAME))
38
40
 
39
41
  def fit(
@@ -15,7 +15,6 @@ def create(
15
15
  dt_column: str | None = None,
16
16
  max_train_timeout: datetime.timedelta | None = None,
17
17
  cutoff_dt: datetime.datetime | None = None,
18
- max_features: int | None = None,
19
18
  ) -> Trainer:
20
19
  """Create a trainer."""
21
20
  return Trainer(
@@ -26,5 +25,4 @@ def create(
26
25
  dt_column=dt_column,
27
26
  max_train_timeout=max_train_timeout,
28
27
  cutoff_dt=cutoff_dt,
29
- max_features=max_features,
30
28
  )
@@ -26,6 +26,7 @@ _L2_LEAF_REG_KEY = "l2_leaf_reg"
26
26
  _BOOSTING_TYPE_KEY = "boosting_type"
27
27
  _MODEL_TYPE_KEY = "model_type"
28
28
  _EARLY_STOPPING_ROUNDS = "early_stopping_rounds"
29
+ _BEST_ITERATION_KEY = "best_iteration"
29
30
 
30
31
 
31
32
  class CatboostModel(Model):
@@ -41,11 +42,16 @@ class CatboostModel(Model):
41
42
  _boosting_type: None | str
42
43
  _model_type: None | ModelType
43
44
  _early_stopping_rounds: None | int
45
+ _best_iteration: None | int
44
46
 
45
47
  @classmethod
46
48
  def name(cls) -> str:
47
49
  return "catboost"
48
50
 
51
+ @classmethod
52
+ def supports_x(cls, df: pd.DataFrame) -> bool:
53
+ return True
54
+
49
55
  def __init__(self) -> None:
50
56
  super().__init__()
51
57
  self._catboost = None
@@ -56,6 +62,7 @@ class CatboostModel(Model):
56
62
  self._boosting_type = None
57
63
  self._model_type = None
58
64
  self._early_stopping_rounds = None
65
+ self._best_iteration = None
59
66
 
60
67
  @property
61
68
  def estimator(self) -> Any:
@@ -83,7 +90,9 @@ class CatboostModel(Model):
83
90
  "sample_weight": w,
84
91
  }
85
92
 
86
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
93
+ def set_options(
94
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
95
+ ) -> None:
87
96
  self._iterations = trial.suggest_int(_ITERATIONS_KEY, 100, 10000)
88
97
  self._learning_rate = trial.suggest_float(_LEARNING_RATE_KEY, 0.001, 0.3)
89
98
  self._depth = trial.suggest_int(_DEPTH_KEY, 1, 10)
@@ -92,6 +101,7 @@ class CatboostModel(Model):
92
101
  _BOOSTING_TYPE_KEY, ["Ordered", "Plain"]
93
102
  )
94
103
  self._early_stopping_rounds = trial.suggest_int(_EARLY_STOPPING_ROUNDS, 10, 500)
104
+ self._best_iteration = trial.user_attrs.get(_BEST_ITERATION_KEY)
95
105
 
96
106
  def load(self, folder: str) -> None:
97
107
  with open(
@@ -105,10 +115,11 @@ class CatboostModel(Model):
105
115
  self._boosting_type = params[_BOOSTING_TYPE_KEY]
106
116
  self._model_type = ModelType(params[_MODEL_TYPE_KEY])
107
117
  self._early_stopping_rounds = params[_EARLY_STOPPING_ROUNDS]
118
+ self._best_iteration = params.get(_BEST_ITERATION_KEY)
108
119
  catboost = self._provide_catboost()
109
120
  catboost.load_model(os.path.join(folder, _MODEL_FILENAME))
110
121
 
111
- def save(self, folder: str) -> None:
122
+ def save(self, folder: str, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
112
123
  with open(
113
124
  os.path.join(folder, _MODEL_PARAMS_FILENAME), "w", encoding="utf8"
114
125
  ) as handle:
@@ -121,11 +132,13 @@ class CatboostModel(Model):
121
132
  _BOOSTING_TYPE_KEY: self._boosting_type,
122
133
  _MODEL_TYPE_KEY: str(self._model_type),
123
134
  _EARLY_STOPPING_ROUNDS: self._early_stopping_rounds,
135
+ _BEST_ITERATION_KEY: self._best_iteration,
124
136
  },
125
137
  handle,
126
138
  )
127
139
  catboost = self._provide_catboost()
128
140
  catboost.save_model(os.path.join(folder, _MODEL_FILENAME))
141
+ trial.user_attrs[_BEST_ITERATION_KEY] = self._best_iteration
129
142
 
130
143
  def fit(
131
144
  self,
@@ -137,8 +150,6 @@ class CatboostModel(Model):
137
150
  ) -> Self:
138
151
  if y is None:
139
152
  raise ValueError("y is null.")
140
- if eval_x is None:
141
- raise ValueError("eval_x is null.")
142
153
  self._model_type = determine_model_type(y)
143
154
  catboost = self._provide_catboost()
144
155
 
@@ -148,10 +159,14 @@ class CatboostModel(Model):
148
159
  weight=w,
149
160
  cat_features=df.select_dtypes(include="category").columns.tolist(),
150
161
  )
151
- eval_pool = Pool(
152
- eval_x,
153
- label=eval_y,
154
- cat_features=eval_x.select_dtypes(include="category").columns.tolist(),
162
+ eval_pool = (
163
+ Pool(
164
+ eval_x,
165
+ label=eval_y,
166
+ cat_features=eval_x.select_dtypes(include="category").columns.tolist(),
167
+ )
168
+ if eval_x is not None
169
+ else None
155
170
  )
156
171
  catboost.fit(
157
172
  train_pool,
@@ -162,6 +177,7 @@ class CatboostModel(Model):
162
177
  )
163
178
  importances = catboost.get_feature_importance(prettified=True)
164
179
  logging.info("Importances:\n%s", importances)
180
+ self._best_iteration = catboost.get_best_iteration()
165
181
  return self
166
182
 
167
183
  def transform(self, df: pd.DataFrame) -> pd.DataFrame:
@@ -186,10 +202,14 @@ class CatboostModel(Model):
186
202
  def _provide_catboost(self) -> CatBoost:
187
203
  catboost = self._catboost
188
204
  if catboost is None:
205
+ best_iteration = self._best_iteration
206
+ iterations = (
207
+ best_iteration if best_iteration is not None else self._iterations
208
+ )
189
209
  match self._model_type:
190
210
  case ModelType.BINARY:
191
211
  catboost = CatBoostClassifierWrapper(
192
- iterations=self._iterations,
212
+ iterations=iterations,
193
213
  learning_rate=self._learning_rate,
194
214
  depth=self._depth,
195
215
  l2_leaf_reg=self._l2_leaf_reg,
@@ -201,7 +221,7 @@ class CatboostModel(Model):
201
221
  )
202
222
  case ModelType.REGRESSION:
203
223
  catboost = CatBoostRegressorWrapper(
204
- iterations=self._iterations,
224
+ iterations=iterations,
205
225
  learning_rate=self._learning_rate,
206
226
  depth=self._depth,
207
227
  l2_leaf_reg=self._l2_leaf_reg,
@@ -213,7 +233,7 @@ class CatboostModel(Model):
213
233
  )
214
234
  case ModelType.BINNED_BINARY:
215
235
  catboost = CatBoostClassifierWrapper(
216
- iterations=self._iterations,
236
+ iterations=iterations,
217
237
  learning_rate=self._learning_rate,
218
238
  depth=self._depth,
219
239
  l2_leaf_reg=self._l2_leaf_reg,
@@ -225,7 +245,7 @@ class CatboostModel(Model):
225
245
  )
226
246
  case ModelType.MULTI_CLASSIFICATION:
227
247
  catboost = CatBoostClassifierWrapper(
228
- iterations=self._iterations,
248
+ iterations=iterations,
229
249
  learning_rate=self._learning_rate,
230
250
  depth=self._depth,
231
251
  l2_leaf_reg=self._l2_leaf_reg,
@@ -20,6 +20,11 @@ class Model(Params, Fit):
20
20
  """The name of the model."""
21
21
  raise NotImplementedError("name not implemented in parent class.")
22
22
 
23
+ @classmethod
24
+ def supports_x(cls, df: pd.DataFrame) -> bool:
25
+ """Whether the model supports the X values."""
26
+ raise NotImplementedError("supports_x not implemented in parent class.")
27
+
23
28
  @property
24
29
  def estimator(self) -> Any:
25
30
  """The estimator backing the model."""
@@ -34,6 +34,10 @@ class ModelRouter(Model):
34
34
  def name(cls) -> str:
35
35
  return "router"
36
36
 
37
+ @classmethod
38
+ def supports_x(cls, df: pd.DataFrame) -> bool:
39
+ return True
40
+
37
41
  @property
38
42
  def estimator(self) -> Any:
39
43
  model = self._model
@@ -61,9 +65,15 @@ class ModelRouter(Model):
61
65
  raise ValueError("model is null")
62
66
  return model.pre_fit(df, y=y, eval_x=eval_x, eval_y=eval_y, w=w)
63
67
 
64
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
65
- model = _MODELS[trial.suggest_categorical("model", list(_MODELS.keys()))]()
66
- model.set_options(trial)
68
+ def set_options(
69
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
70
+ ) -> None:
71
+ model = _MODELS[
72
+ trial.suggest_categorical(
73
+ "model", [k for k, v in _MODELS.items() if v.supports_x(df)]
74
+ )
75
+ ]()
76
+ model.set_options(trial, df)
67
77
  self._model = model
68
78
 
69
79
  def load(self, folder: str) -> None:
@@ -73,11 +83,11 @@ class ModelRouter(Model):
73
83
  model.load(folder)
74
84
  self._model = model
75
85
 
76
- def save(self, folder: str) -> None:
86
+ def save(self, folder: str, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
77
87
  model = self._model
78
88
  if model is None:
79
89
  raise ValueError("model is null")
80
- model.save(folder)
90
+ model.save(folder, trial)
81
91
  with open(
82
92
  os.path.join(folder, _MODEL_ROUTER_FILE), "w", encoding="utf8"
83
93
  ) as handle:
@@ -31,6 +31,10 @@ class TabPFNModel(Model):
31
31
  def name(cls) -> str:
32
32
  return "tabpfn"
33
33
 
34
+ @classmethod
35
+ def supports_x(cls, df: pd.DataFrame) -> bool:
36
+ return len(df.columns.values) < 500
37
+
34
38
  def __init__(self) -> None:
35
39
  super().__init__()
36
40
  self._tabpfn = None
@@ -57,7 +61,9 @@ class TabPFNModel(Model):
57
61
  self._model_type = determine_model_type(y)
58
62
  return {}
59
63
 
60
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
64
+ def set_options(
65
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
66
+ ) -> None:
61
67
  pass
62
68
 
63
69
  def load(self, folder: str) -> None:
@@ -69,7 +75,7 @@ class TabPFNModel(Model):
69
75
  params = json.load(handle)
70
76
  self._model_type = ModelType(params[_MODEL_TYPE_KEY])
71
77
 
72
- def save(self, folder: str) -> None:
78
+ def save(self, folder: str, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
73
79
  with open(os.path.join(folder, _MODEL_FILENAME), "wb") as f:
74
80
  pickle.dump(self._tabpfn, f)
75
81
  with open(
@@ -1,12 +1,15 @@
1
1
  """A class for loading/saving parameters."""
2
2
 
3
3
  import optuna
4
+ import pandas as pd
4
5
 
5
6
 
6
7
  class Params:
7
8
  """The params prototype class."""
8
9
 
9
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
10
+ def set_options(
11
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
12
+ ) -> None:
10
13
  """Set the options used in the object."""
11
14
  raise NotImplementedError("set_options not implemented in parent class.")
12
15
 
@@ -14,6 +17,6 @@ class Params:
14
17
  """Loads the objects from a folder."""
15
18
  raise NotImplementedError("load not implemented in parent class.")
16
19
 
17
- def save(self, folder: str) -> None:
20
+ def save(self, folder: str, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
18
21
  """Saves the objects into a folder."""
19
22
  raise NotImplementedError("save not implemented in parent class.")
@@ -32,14 +32,16 @@ class BaseSelectorReducer(Reducer):
32
32
  """Whether the class should raise its exception if it encounters it."""
33
33
  return True
34
34
 
35
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
35
+ def set_options(
36
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
37
+ ) -> None:
36
38
  pass
37
39
 
38
40
  def load(self, folder: str) -> None:
39
41
  file_path = os.path.join(folder, self._file_name)
40
42
  self._base_selector = joblib.load(file_path)
41
43
 
42
- def save(self, folder: str) -> None:
44
+ def save(self, folder: str, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
43
45
  file_path = os.path.join(folder, self._file_name)
44
46
  joblib.dump(self._base_selector, file_path)
45
47
 
@@ -12,8 +12,8 @@ from .constant_reducer import ConstantReducer
12
12
  from .correlation_reducer import CorrelationReducer
13
13
  from .duplicate_reducer import DuplicateReducer
14
14
  from .nonnumeric_reducer import NonNumericReducer
15
- from .pca_reducer import PCAReducer
16
15
  from .reducer import Reducer
16
+ from .smart_correlation_reducer import SmartCorrelationReducer
17
17
  from .unseen_reducer import UnseenReducer
18
18
 
19
19
  _COMBINED_REDUCER_FILE = "combined_reducer.json"
@@ -25,25 +25,26 @@ class CombinedReducer(Reducer):
25
25
 
26
26
  # pylint: disable=too-many-positional-arguments,too-many-arguments
27
27
 
28
- def __init__(self, max_features: int | None):
28
+ def __init__(self):
29
29
  super().__init__()
30
- self._max_features = max_features
31
30
  self._reducers = [
32
31
  UnseenReducer(),
33
32
  NonNumericReducer(),
34
33
  ConstantReducer(),
35
34
  DuplicateReducer(),
36
35
  CorrelationReducer(),
37
- PCAReducer(max_features),
36
+ SmartCorrelationReducer(),
38
37
  ]
39
38
 
40
39
  @classmethod
41
40
  def name(cls) -> str:
42
41
  return "combined"
43
42
 
44
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
43
+ def set_options(
44
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
45
+ ) -> None:
45
46
  for reducer in self._reducers:
46
- reducer.set_options(trial)
47
+ reducer.set_options(trial, df)
47
48
 
48
49
  def load(self, folder: str) -> None:
49
50
  self._reducers = []
@@ -62,12 +63,12 @@ class CombinedReducer(Reducer):
62
63
  self._reducers.append(NonNumericReducer())
63
64
  elif reducer_name == UnseenReducer.name():
64
65
  self._reducers.append(UnseenReducer())
65
- elif reducer_name == PCAReducer.name():
66
- self._reducers.append(PCAReducer(self._max_features))
66
+ elif reducer_name == SmartCorrelationReducer.name():
67
+ self._reducers.append(SmartCorrelationReducer())
67
68
  for reducer in self._reducers:
68
69
  reducer.load(folder)
69
70
 
70
- def save(self, folder: str) -> None:
71
+ def save(self, folder: str, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
71
72
  with open(
72
73
  os.path.join(folder, _COMBINED_REDUCER_FILE), "w", encoding="utf8"
73
74
  ) as handle:
@@ -78,7 +79,7 @@ class CombinedReducer(Reducer):
78
79
  handle,
79
80
  )
80
81
  for reducer in self._reducers:
81
- reducer.save(folder)
82
+ reducer.save(folder, trial)
82
83
 
83
84
  def fit(
84
85
  self,
@@ -1,18 +1,22 @@
1
1
  """A reducer that removes correlation features."""
2
2
 
3
+ import optuna
4
+ import pandas as pd
3
5
  from feature_engine.selection import DropCorrelatedFeatures
4
6
 
5
7
  from .base_selector_reducer import BaseSelectorReducer
6
8
 
7
9
  _CORRELATION_REDUCER_FILENAME = "correlation_reducer.joblib"
10
+ _CORRELATION_REDUCER_THRESHOLD = "correlation_reducer_threshold"
8
11
 
9
12
 
10
13
  class CorrelationReducer(BaseSelectorReducer):
11
14
  """A class that removes correlated values from a dataset."""
12
15
 
13
16
  def __init__(self) -> None:
17
+ self._correlation_selector = DropCorrelatedFeatures(missing_values="ignore")
14
18
  super().__init__(
15
- DropCorrelatedFeatures(missing_values="ignore"),
19
+ self._correlation_selector,
16
20
  _CORRELATION_REDUCER_FILENAME,
17
21
  )
18
22
 
@@ -23,3 +27,10 @@ class CorrelationReducer(BaseSelectorReducer):
23
27
  @classmethod
24
28
  def should_raise(cls) -> bool:
25
29
  return False
30
+
31
+ def set_options(
32
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
33
+ ) -> None:
34
+ self._correlation_selector.threshold = trial.suggest_float(
35
+ _CORRELATION_REDUCER_THRESHOLD, 0.1, 0.9
36
+ )
@@ -17,13 +17,15 @@ class NonNumericReducer(Reducer):
17
17
  def name(cls) -> str:
18
18
  return "nonnumeric"
19
19
 
20
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
20
+ def set_options(
21
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
22
+ ) -> None:
21
23
  pass
22
24
 
23
25
  def load(self, folder: str) -> None:
24
26
  pass
25
27
 
26
- def save(self, folder: str) -> None:
28
+ def save(self, folder: str, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
27
29
  pass
28
30
 
29
31
  def fit(
@@ -0,0 +1,32 @@
1
+ """A reducer that removes correlation features via further heuristics."""
2
+
3
+ import optuna
4
+ import pandas as pd
5
+ from feature_engine.selection import SmartCorrelatedSelection
6
+
7
+ from .base_selector_reducer import BaseSelectorReducer
8
+
9
+ _SMART_CORRELATION_REDUCER_FILENAME = "smart_correlation_reducer.joblib"
10
+ _SMART_CORRELATION_REDUCER_THRESHOLD = "smart_correlation_reducer_threshold"
11
+
12
+
13
+ class SmartCorrelationReducer(BaseSelectorReducer):
14
+ """A class that removes smart correlated values from a dataset."""
15
+
16
+ def __init__(self) -> None:
17
+ self._correlation_selector = SmartCorrelatedSelection(missing_values="ignore")
18
+ super().__init__(
19
+ self._correlation_selector,
20
+ _SMART_CORRELATION_REDUCER_FILENAME,
21
+ )
22
+
23
+ @classmethod
24
+ def name(cls) -> str:
25
+ return "smart_correlation"
26
+
27
+ def set_options(
28
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
29
+ ) -> None:
30
+ self._correlation_selector.threshold = trial.suggest_float(
31
+ _SMART_CORRELATION_REDUCER_THRESHOLD, 0.1, 0.9
32
+ )
@@ -25,7 +25,9 @@ class UnseenReducer(Reducer):
25
25
  def name(cls) -> str:
26
26
  return "unseen"
27
27
 
28
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
28
+ def set_options(
29
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
30
+ ) -> None:
29
31
  pass
30
32
 
31
33
  def load(self, folder: str) -> None:
@@ -34,7 +36,7 @@ class UnseenReducer(Reducer):
34
36
  ) as handle:
35
37
  self._seen_features = json.load(handle)
36
38
 
37
- def save(self, folder: str) -> None:
39
+ def save(self, folder: str, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
38
40
  with open(
39
41
  os.path.join(folder, _UNSEEN_REDUCER_FILE), "w", encoding="utf8"
40
42
  ) as handle:
@@ -31,14 +31,16 @@ class Selector(Params, Fit):
31
31
  self._steps = 0
32
32
  self._selector = None
33
33
 
34
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
34
+ def set_options(
35
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
36
+ ) -> None:
35
37
  self._feature_ratio = trial.suggest_float("feature_ratio", 0.0, 1.0)
36
38
  self._steps = trial.suggest_int("steps", 1, 10)
37
39
 
38
40
  def load(self, folder: str) -> None:
39
41
  self._selector = joblib.load(os.path.join(folder, _SELECTOR_FILE))
40
42
 
41
- def save(self, folder: str) -> None:
43
+ def save(self, folder: str, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
42
44
  joblib.dump(self._selector, os.path.join(folder, _SELECTOR_FILE))
43
45
 
44
46
  def fit(
@@ -36,7 +36,6 @@ _TEST_SIZE_KEY = "test_size"
36
36
  _VALIDATION_SIZE_KEY = "validation_size"
37
37
  _IDX_USR_ATTR_KEY = "idx"
38
38
  _DT_COLUMN_KEY = "dt_column"
39
- _MAX_FEATURES_KEY = "max_features"
40
39
 
41
40
 
42
41
  class Trainer(Fit):
@@ -54,7 +53,6 @@ class Trainer(Fit):
54
53
  dt_column: str | None = None,
55
54
  max_train_timeout: datetime.timedelta | None = None,
56
55
  cutoff_dt: datetime.datetime | None = None,
57
- max_features: int | None = None,
58
56
  ):
59
57
  tqdm.tqdm.pandas()
60
58
 
@@ -105,7 +103,6 @@ class Trainer(Fit):
105
103
  )
106
104
  if dt_column is None:
107
105
  dt_column = params[_DT_COLUMN_KEY]
108
- max_features = params.get(_MAX_FEATURES_KEY)
109
106
  else:
110
107
  with open(params_file, "w", encoding="utf8") as handle:
111
108
  validation_size_value = None
@@ -136,7 +133,6 @@ class Trainer(Fit):
136
133
  _TEST_SIZE_KEY: test_size_value,
137
134
  _VALIDATION_SIZE_KEY: validation_size_value,
138
135
  _DT_COLUMN_KEY: dt_column,
139
- _MAX_FEATURES_KEY: max_features,
140
136
  },
141
137
  handle,
142
138
  )
@@ -147,7 +143,6 @@ class Trainer(Fit):
147
143
  self._dt_column = dt_column
148
144
  self._max_train_timeout = max_train_timeout
149
145
  self._cutoff_dt = cutoff_dt
150
- self._max_features = max_features
151
146
 
152
147
  def _provide_study(self, column: str) -> optuna.Study:
153
148
  storage_name = f"sqlite:///{self._folder}/{column}/{_STUDYDB_FILENAME}"
@@ -213,7 +208,7 @@ class Trainer(Fit):
213
208
  try:
214
209
  # Window the data
215
210
  windower = Windower(self._dt_column)
216
- windower.set_options(trial)
211
+ windower.set_options(trial, x)
217
212
  x_train = windower.fit_transform(x_train)
218
213
  y_train = y_train[-len(x_train) :]
219
214
  if len(y_train.unique()) <= 1:
@@ -221,25 +216,25 @@ class Trainer(Fit):
221
216
  return -1.0
222
217
 
223
218
  # Perform common reductions
224
- reducer = CombinedReducer(self._max_features)
225
- reducer.set_options(trial)
219
+ reducer = CombinedReducer()
220
+ reducer.set_options(trial, x)
226
221
  x_train = reducer.fit_transform(x_train)
227
222
  x_test = reducer.transform(x_test)
228
223
 
229
224
  # Calculate the row weights
230
225
  weights = CombinedWeights()
231
- weights.set_options(trial)
226
+ weights.set_options(trial, x)
232
227
  w = weights.fit(x_train, y=y_train).transform(y_train.to_frame())[
233
228
  WEIGHTS_COLUMN
234
229
  ]
235
230
 
236
231
  # Create model
237
232
  model = ModelRouter()
238
- model.set_options(trial)
233
+ model.set_options(trial, x)
239
234
 
240
235
  # Train
241
236
  selector = Selector(model)
242
- selector.set_options(trial)
237
+ selector.set_options(trial, x)
243
238
  selector.fit(x_train, y=y_train, w=w, eval_x=x_test, eval_y=y_test)
244
239
  x_train = selector.transform(x_train)
245
240
  x_test = selector.transform(x_test)
@@ -249,7 +244,7 @@ class Trainer(Fit):
249
244
 
250
245
  # Calibrate
251
246
  calibrator = CalibratorRouter(model)
252
- calibrator.set_options(trial)
247
+ calibrator.set_options(trial, x)
253
248
  calibrator.fit(x_pred, y=y_train)
254
249
 
255
250
  if save:
@@ -258,12 +253,12 @@ class Trainer(Fit):
258
253
  )
259
254
  if not os.path.exists(folder):
260
255
  os.mkdir(folder)
261
- windower.save(folder)
262
- reducer.save(folder)
263
- weights.save(folder)
264
- model.save(folder)
265
- selector.save(folder)
266
- calibrator.save(folder)
256
+ windower.save(folder, trial)
257
+ reducer.save(folder, trial)
258
+ weights.save(folder, trial)
259
+ model.save(folder, trial)
260
+ selector.save(folder, trial)
261
+ calibrator.save(folder, trial)
267
262
 
268
263
  y_pred = model.transform(x_test)
269
264
  y_pred = calibrator.transform(y_pred)
@@ -380,6 +375,7 @@ class Trainer(Fit):
380
375
 
381
376
  def transform(self, df: pd.DataFrame) -> pd.DataFrame:
382
377
  """Predict the expected values of the data."""
378
+ tqdm.tqdm.pandas(desc="Inferring...")
383
379
  input_df = df.copy()
384
380
  df = df.reindex(sorted(df.columns), axis=1)
385
381
  feature_columns = df.columns.values
@@ -430,7 +426,7 @@ class Trainer(Fit):
430
426
  date_str = dates[-1].isoformat()
431
427
  folder = os.path.join(column_path, date_str)
432
428
 
433
- reducer = CombinedReducer(self._max_features)
429
+ reducer = CombinedReducer()
434
430
  reducer.load(folder)
435
431
 
436
432
  model = ModelRouter()
@@ -27,13 +27,15 @@ class ClassWeights(Weights):
27
27
  """The name of the weight class."""
28
28
  return "class"
29
29
 
30
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
30
+ def set_options(
31
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
32
+ ) -> None:
31
33
  pass
32
34
 
33
35
  def load(self, folder: str) -> None:
34
36
  pass
35
37
 
36
- def save(self, folder: str) -> None:
38
+ def save(self, folder: str, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
37
39
  pass
38
40
 
39
41
  def fit(
@@ -23,17 +23,19 @@ class CombinedWeights(Weights):
23
23
  def name(cls) -> str:
24
24
  return "combined"
25
25
 
26
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
26
+ def set_options(
27
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
28
+ ) -> None:
27
29
  for weights in self._weights:
28
- weights.set_options(trial)
30
+ weights.set_options(trial, df)
29
31
 
30
32
  def load(self, folder: str) -> None:
31
33
  for weights in self._weights:
32
34
  weights.load(folder)
33
35
 
34
- def save(self, folder: str) -> None:
36
+ def save(self, folder: str, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
35
37
  for weights in self._weights:
36
- weights.save(folder)
38
+ weights.save(folder, trial)
37
39
 
38
40
  def fit(
39
41
  self,
@@ -19,13 +19,15 @@ class ExponentialWeights(Weights):
19
19
  """The name of the weight class."""
20
20
  return "exponential"
21
21
 
22
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
22
+ def set_options(
23
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
24
+ ) -> None:
23
25
  pass
24
26
 
25
27
  def load(self, folder: str) -> None:
26
28
  pass
27
29
 
28
- def save(self, folder: str) -> None:
30
+ def save(self, folder: str, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
29
31
  pass
30
32
 
31
33
  def fit(
@@ -19,13 +19,15 @@ class LinearWeights(Weights):
19
19
  """The name of the weight class."""
20
20
  return "linear"
21
21
 
22
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
22
+ def set_options(
23
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
24
+ ) -> None:
23
25
  pass
24
26
 
25
27
  def load(self, folder: str) -> None:
26
28
  pass
27
29
 
28
- def save(self, folder: str) -> None:
30
+ def save(self, folder: str, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
29
31
  pass
30
32
 
31
33
  def fit(
@@ -19,13 +19,15 @@ class NoopWeights(Weights):
19
19
  """The name of the weight class."""
20
20
  return "noop"
21
21
 
22
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
22
+ def set_options(
23
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
24
+ ) -> None:
23
25
  pass
24
26
 
25
27
  def load(self, folder: str) -> None:
26
28
  pass
27
29
 
28
- def save(self, folder: str) -> None:
30
+ def save(self, folder: str, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
29
31
  pass
30
32
 
31
33
  def fit(
@@ -20,13 +20,15 @@ class SigmoidWeights(Weights):
20
20
  """The name of the weight class."""
21
21
  return "sigmoid"
22
22
 
23
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
23
+ def set_options(
24
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
25
+ ) -> None:
24
26
  pass
25
27
 
26
28
  def load(self, folder: str) -> None:
27
29
  pass
28
30
 
29
- def save(self, folder: str) -> None:
31
+ def save(self, folder: str, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
30
32
  pass
31
33
 
32
34
  def fit(
@@ -38,7 +38,9 @@ class WeightsRouter(Weights):
38
38
  def name(cls) -> str:
39
39
  return "router"
40
40
 
41
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
41
+ def set_options(
42
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
43
+ ) -> None:
42
44
  self._weights = _WEIGHTS[
43
45
  trial.suggest_categorical("weights", list(_WEIGHTS.keys()))
44
46
  ]()
@@ -54,10 +56,11 @@ class WeightsRouter(Weights):
54
56
  weights = _WEIGHTS[params[_WEIGHTS_KEY]]()
55
57
  self._weights = weights
56
58
 
57
- def save(self, folder: str) -> None:
59
+ def save(self, folder: str, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
58
60
  weights = self._weights
59
61
  if weights is None:
60
62
  raise ValueError("weights is null")
63
+ weights.save(folder, trial)
61
64
  with open(
62
65
  os.path.join(folder, _WEIGHTS_ROUTER_FILE), "w", encoding="utf8"
63
66
  ) as handle:
@@ -28,7 +28,9 @@ class Windower(Params, Fit):
28
28
  self._lookback_ratio = None
29
29
  self._dt_column = dt_column
30
30
 
31
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
31
+ def set_options(
32
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
33
+ ) -> None:
32
34
  self._lookback_ratio = trial.suggest_float("lookback", 0.1, 1.0)
33
35
 
34
36
  def load(self, folder: str) -> None:
@@ -36,7 +38,7 @@ class Windower(Params, Fit):
36
38
  params = json.load(handle)
37
39
  self._lookback = params[_LOOKBACK_KEY]
38
40
 
39
- def save(self, folder: str) -> None:
41
+ def save(self, folder: str, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
40
42
  with open(os.path.join(folder, _WINDOWER_FILE), "w", encoding="utf8") as handle:
41
43
  json.dump(
42
44
  {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.0.26
3
+ Version: 0.0.28
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
@@ -40,8 +40,8 @@ wavetrainer/reducer/constant_reducer.py
40
40
  wavetrainer/reducer/correlation_reducer.py
41
41
  wavetrainer/reducer/duplicate_reducer.py
42
42
  wavetrainer/reducer/nonnumeric_reducer.py
43
- wavetrainer/reducer/pca_reducer.py
44
43
  wavetrainer/reducer/reducer.py
44
+ wavetrainer/reducer/smart_correlation_reducer.py
45
45
  wavetrainer/reducer/unseen_reducer.py
46
46
  wavetrainer/selector/__init__.py
47
47
  wavetrainer/selector/selector.py
@@ -1,77 +0,0 @@
1
- """A reducer that removes low variance columns."""
2
-
3
- import os
4
- from typing import Self
5
-
6
- import joblib # type: ignore
7
- import optuna
8
- import pandas as pd
9
- from sklearn.decomposition import PCA # type: ignore
10
- from sklearn.preprocessing import StandardScaler # type: ignore
11
-
12
- from .reducer import Reducer
13
-
14
- _PCA_FILE = "pca.joblib"
15
- _PCA_SCALER_FILE = "pca_scaler.joblib"
16
-
17
-
18
- class PCAReducer(Reducer):
19
- """A class that removes low variance columns from a dataframe."""
20
-
21
- # pylint: disable=too-many-positional-arguments,too-many-arguments
22
-
23
- def __init__(self, max_features: int | None):
24
- super().__init__()
25
- self._max_features = max_features
26
- if max_features is not None:
27
- self._scaler = StandardScaler()
28
- self._pca = PCA(n_components=max_features)
29
- else:
30
- self._scaler = None
31
- self._pca = None
32
-
33
- @classmethod
34
- def name(cls) -> str:
35
- return "pca"
36
-
37
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
38
- pass
39
-
40
- def load(self, folder: str) -> None:
41
- pca_scaler_file = os.path.join(folder, _PCA_SCALER_FILE)
42
- pca_file = os.path.join(folder, _PCA_FILE)
43
- if os.path.exists(pca_scaler_file):
44
- self._scaler = joblib.load(pca_scaler_file)
45
- if os.path.exists(pca_file):
46
- self._pca = joblib.load(pca_file)
47
-
48
- def save(self, folder: str) -> None:
49
- if self._scaler is not None:
50
- joblib.dump(self._scaler, os.path.join(folder, _PCA_SCALER_FILE))
51
- if self._pca is not None:
52
- joblib.dump(self._pca, os.path.join(folder, _PCA_FILE))
53
-
54
- def fit(
55
- self,
56
- df: pd.DataFrame,
57
- y: pd.Series | pd.DataFrame | None = None,
58
- w: pd.Series | None = None,
59
- eval_x: pd.DataFrame | None = None,
60
- eval_y: pd.Series | pd.DataFrame | None = None,
61
- ) -> Self:
62
- pca = self._pca
63
- scaler = self._scaler
64
- if pca is None or scaler is None:
65
- return self
66
- if len(df.columns.values) < pca.n_components: # type: ignore
67
- return self
68
- x_scaled = scaler.fit_transform(df)
69
- pca.fit(x_scaled)
70
- return self
71
-
72
- def transform(self, df: pd.DataFrame) -> pd.DataFrame:
73
- if self._pca is None:
74
- return df
75
- if len(df.columns.values) < self._pca.n_components: # type: ignore
76
- return df
77
- return self._pca.transform(df)
File without changes
File without changes
File without changes
File without changes