wavetrainer 0.0.27__tar.gz → 0.0.29__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. {wavetrainer-0.0.27/wavetrainer.egg-info → wavetrainer-0.0.29}/PKG-INFO +1 -1
  2. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/setup.py +1 -1
  3. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/__init__.py +1 -1
  4. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/calibrator/calibrator_router.py +7 -2
  5. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/calibrator/mapie_calibrator.py +3 -1
  6. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/calibrator/vennabers_calibrator.py +3 -1
  7. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/create.py +0 -2
  8. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/model/catboost_model.py +7 -1
  9. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/model/model.py +5 -0
  10. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/model/model_router.py +13 -3
  11. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/model/tabpfn_model.py +7 -1
  12. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/params.py +4 -1
  13. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/reducer/base_selector_reducer.py +3 -1
  14. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/reducer/combined_reducer.py +9 -8
  15. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/reducer/correlation_reducer.py +12 -1
  16. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/reducer/nonnumeric_reducer.py +3 -1
  17. wavetrainer-0.0.29/wavetrainer/reducer/smart_correlation_reducer.py +32 -0
  18. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/reducer/unseen_reducer.py +3 -1
  19. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/selector/selector.py +3 -1
  20. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/trainer.py +42 -24
  21. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/weights/class_weights.py +3 -1
  22. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/weights/combined_weights.py +4 -2
  23. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/weights/exponential_weights.py +3 -1
  24. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/weights/linear_weights.py +3 -1
  25. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/weights/noop_weights.py +3 -1
  26. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/weights/sigmoid_weights.py +3 -1
  27. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/weights/weights_router.py +3 -1
  28. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/windower/windower.py +3 -1
  29. {wavetrainer-0.0.27 → wavetrainer-0.0.29/wavetrainer.egg-info}/PKG-INFO +1 -1
  30. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer.egg-info/SOURCES.txt +1 -1
  31. wavetrainer-0.0.27/wavetrainer/reducer/pca_reducer.py +0 -77
  32. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/LICENSE +0 -0
  33. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/MANIFEST.in +0 -0
  34. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/README.md +0 -0
  35. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/requirements.txt +0 -0
  36. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/setup.cfg +0 -0
  37. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/tests/__init__.py +0 -0
  38. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/tests/model/__init__.py +0 -0
  39. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/tests/model/catboost_kwargs_test.py +0 -0
  40. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/tests/trainer_test.py +0 -0
  41. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/calibrator/__init__.py +0 -0
  42. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/calibrator/calibrator.py +0 -0
  43. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/exceptions.py +0 -0
  44. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/fit.py +0 -0
  45. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/model/__init__.py +0 -0
  46. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/model/catboost_classifier_wrap.py +0 -0
  47. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/model/catboost_kwargs.py +0 -0
  48. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/model/catboost_regressor_wrap.py +0 -0
  49. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/model_type.py +0 -0
  50. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/reducer/__init__.py +0 -0
  51. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/reducer/constant_reducer.py +0 -0
  52. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/reducer/duplicate_reducer.py +0 -0
  53. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/reducer/reducer.py +0 -0
  54. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/selector/__init__.py +0 -0
  55. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/weights/__init__.py +0 -0
  56. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/weights/weights.py +0 -0
  57. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/windower/__init__.py +0 -0
  58. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer.egg-info/dependency_links.txt +0 -0
  59. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer.egg-info/not-zip-safe +0 -0
  60. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer.egg-info/requires.txt +0 -0
  61. {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.0.27
3
+ Version: 0.0.29
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
23
23
 
24
24
  setup(
25
25
  name='wavetrainer',
26
- version='0.0.27',
26
+ version='0.0.29',
27
27
  description='A library for automatically finding the optimal model within feature and hyperparameter space.',
28
28
  long_description=long_description,
29
29
  long_description_content_type='text/markdown',
@@ -2,5 +2,5 @@
2
2
 
3
3
  from .create import create
4
4
 
5
- __VERSION__ = "0.0.27"
5
+ __VERSION__ = "0.0.29"
6
6
  __all__ = ("create",)
@@ -36,8 +36,13 @@ class CalibratorRouter(Calibrator):
36
36
  def name(cls) -> str:
37
37
  return "router"
38
38
 
39
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
40
- pass
39
+ def set_options(
40
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
41
+ ) -> None:
42
+ calibrator = self._calibrator
43
+ if calibrator is None:
44
+ return
45
+ calibrator.set_options(trial, df)
41
46
 
42
47
  def load(self, folder: str) -> None:
43
48
  with open(
@@ -29,7 +29,9 @@ class MAPIECalibrator(Calibrator):
29
29
  def name(cls) -> str:
30
30
  return "mapie"
31
31
 
32
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
32
+ def set_options(
33
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
34
+ ) -> None:
33
35
  pass
34
36
 
35
37
  def load(self, folder: str) -> None:
@@ -27,7 +27,9 @@ class VennabersCalibrator(Calibrator):
27
27
  def name(cls) -> str:
28
28
  return "vennabers"
29
29
 
30
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
30
+ def set_options(
31
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
32
+ ) -> None:
31
33
  pass
32
34
 
33
35
  def load(self, folder: str) -> None:
@@ -15,7 +15,6 @@ def create(
15
15
  dt_column: str | None = None,
16
16
  max_train_timeout: datetime.timedelta | None = None,
17
17
  cutoff_dt: datetime.datetime | None = None,
18
- max_features: int | None = None,
19
18
  ) -> Trainer:
20
19
  """Create a trainer."""
21
20
  return Trainer(
@@ -26,5 +25,4 @@ def create(
26
25
  dt_column=dt_column,
27
26
  max_train_timeout=max_train_timeout,
28
27
  cutoff_dt=cutoff_dt,
29
- max_features=max_features,
30
28
  )
@@ -48,6 +48,10 @@ class CatboostModel(Model):
48
48
  def name(cls) -> str:
49
49
  return "catboost"
50
50
 
51
+ @classmethod
52
+ def supports_x(cls, df: pd.DataFrame) -> bool:
53
+ return True
54
+
51
55
  def __init__(self) -> None:
52
56
  super().__init__()
53
57
  self._catboost = None
@@ -86,7 +90,9 @@ class CatboostModel(Model):
86
90
  "sample_weight": w,
87
91
  }
88
92
 
89
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
93
+ def set_options(
94
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
95
+ ) -> None:
90
96
  self._iterations = trial.suggest_int(_ITERATIONS_KEY, 100, 10000)
91
97
  self._learning_rate = trial.suggest_float(_LEARNING_RATE_KEY, 0.001, 0.3)
92
98
  self._depth = trial.suggest_int(_DEPTH_KEY, 1, 10)
@@ -20,6 +20,11 @@ class Model(Params, Fit):
20
20
  """The name of the model."""
21
21
  raise NotImplementedError("name not implemented in parent class.")
22
22
 
23
+ @classmethod
24
+ def supports_x(cls, df: pd.DataFrame) -> bool:
25
+ """Whether the model supports the X values."""
26
+ raise NotImplementedError("supports_x not implemented in parent class.")
27
+
23
28
  @property
24
29
  def estimator(self) -> Any:
25
30
  """The estimator backing the model."""
@@ -34,6 +34,10 @@ class ModelRouter(Model):
34
34
  def name(cls) -> str:
35
35
  return "router"
36
36
 
37
+ @classmethod
38
+ def supports_x(cls, df: pd.DataFrame) -> bool:
39
+ return True
40
+
37
41
  @property
38
42
  def estimator(self) -> Any:
39
43
  model = self._model
@@ -61,9 +65,15 @@ class ModelRouter(Model):
61
65
  raise ValueError("model is null")
62
66
  return model.pre_fit(df, y=y, eval_x=eval_x, eval_y=eval_y, w=w)
63
67
 
64
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
65
- model = _MODELS[trial.suggest_categorical("model", list(_MODELS.keys()))]()
66
- model.set_options(trial)
68
+ def set_options(
69
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
70
+ ) -> None:
71
+ model = _MODELS[
72
+ trial.suggest_categorical(
73
+ "model", [k for k, v in _MODELS.items() if v.supports_x(df)]
74
+ )
75
+ ]()
76
+ model.set_options(trial, df)
67
77
  self._model = model
68
78
 
69
79
  def load(self, folder: str) -> None:
@@ -31,6 +31,10 @@ class TabPFNModel(Model):
31
31
  def name(cls) -> str:
32
32
  return "tabpfn"
33
33
 
34
+ @classmethod
35
+ def supports_x(cls, df: pd.DataFrame) -> bool:
36
+ return len(df.columns.values) < 500
37
+
34
38
  def __init__(self) -> None:
35
39
  super().__init__()
36
40
  self._tabpfn = None
@@ -57,7 +61,9 @@ class TabPFNModel(Model):
57
61
  self._model_type = determine_model_type(y)
58
62
  return {}
59
63
 
60
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
64
+ def set_options(
65
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
66
+ ) -> None:
61
67
  pass
62
68
 
63
69
  def load(self, folder: str) -> None:
@@ -1,12 +1,15 @@
1
1
  """A class for loading/saving parameters."""
2
2
 
3
3
  import optuna
4
+ import pandas as pd
4
5
 
5
6
 
6
7
  class Params:
7
8
  """The params prototype class."""
8
9
 
9
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
10
+ def set_options(
11
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
12
+ ) -> None:
10
13
  """Set the options used in the object."""
11
14
  raise NotImplementedError("set_options not implemented in parent class.")
12
15
 
@@ -32,7 +32,9 @@ class BaseSelectorReducer(Reducer):
32
32
  """Whether the class should raise its exception if it encounters it."""
33
33
  return True
34
34
 
35
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
35
+ def set_options(
36
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
37
+ ) -> None:
36
38
  pass
37
39
 
38
40
  def load(self, folder: str) -> None:
@@ -12,8 +12,8 @@ from .constant_reducer import ConstantReducer
12
12
  from .correlation_reducer import CorrelationReducer
13
13
  from .duplicate_reducer import DuplicateReducer
14
14
  from .nonnumeric_reducer import NonNumericReducer
15
- from .pca_reducer import PCAReducer
16
15
  from .reducer import Reducer
16
+ from .smart_correlation_reducer import SmartCorrelationReducer
17
17
  from .unseen_reducer import UnseenReducer
18
18
 
19
19
  _COMBINED_REDUCER_FILE = "combined_reducer.json"
@@ -25,25 +25,26 @@ class CombinedReducer(Reducer):
25
25
 
26
26
  # pylint: disable=too-many-positional-arguments,too-many-arguments
27
27
 
28
- def __init__(self, max_features: int | None):
28
+ def __init__(self):
29
29
  super().__init__()
30
- self._max_features = max_features
31
30
  self._reducers = [
32
31
  UnseenReducer(),
33
32
  NonNumericReducer(),
34
33
  ConstantReducer(),
35
34
  DuplicateReducer(),
36
35
  CorrelationReducer(),
37
- PCAReducer(max_features),
36
+ SmartCorrelationReducer(),
38
37
  ]
39
38
 
40
39
  @classmethod
41
40
  def name(cls) -> str:
42
41
  return "combined"
43
42
 
44
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
43
+ def set_options(
44
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
45
+ ) -> None:
45
46
  for reducer in self._reducers:
46
- reducer.set_options(trial)
47
+ reducer.set_options(trial, df)
47
48
 
48
49
  def load(self, folder: str) -> None:
49
50
  self._reducers = []
@@ -62,8 +63,8 @@ class CombinedReducer(Reducer):
62
63
  self._reducers.append(NonNumericReducer())
63
64
  elif reducer_name == UnseenReducer.name():
64
65
  self._reducers.append(UnseenReducer())
65
- elif reducer_name == PCAReducer.name():
66
- self._reducers.append(PCAReducer(self._max_features))
66
+ elif reducer_name == SmartCorrelationReducer.name():
67
+ self._reducers.append(SmartCorrelationReducer())
67
68
  for reducer in self._reducers:
68
69
  reducer.load(folder)
69
70
 
@@ -1,18 +1,22 @@
1
1
  """A reducer that removes correlation features."""
2
2
 
3
+ import optuna
4
+ import pandas as pd
3
5
  from feature_engine.selection import DropCorrelatedFeatures
4
6
 
5
7
  from .base_selector_reducer import BaseSelectorReducer
6
8
 
7
9
  _CORRELATION_REDUCER_FILENAME = "correlation_reducer.joblib"
10
+ _CORRELATION_REDUCER_THRESHOLD = "correlation_reducer_threshold"
8
11
 
9
12
 
10
13
  class CorrelationReducer(BaseSelectorReducer):
11
14
  """A class that removes correlated values from a dataset."""
12
15
 
13
16
  def __init__(self) -> None:
17
+ self._correlation_selector = DropCorrelatedFeatures(missing_values="ignore")
14
18
  super().__init__(
15
- DropCorrelatedFeatures(missing_values="ignore"),
19
+ self._correlation_selector,
16
20
  _CORRELATION_REDUCER_FILENAME,
17
21
  )
18
22
 
@@ -23,3 +27,10 @@ class CorrelationReducer(BaseSelectorReducer):
23
27
  @classmethod
24
28
  def should_raise(cls) -> bool:
25
29
  return False
30
+
31
+ def set_options(
32
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
33
+ ) -> None:
34
+ self._correlation_selector.threshold = trial.suggest_float(
35
+ _CORRELATION_REDUCER_THRESHOLD, 0.1, 0.9
36
+ )
@@ -17,7 +17,9 @@ class NonNumericReducer(Reducer):
17
17
  def name(cls) -> str:
18
18
  return "nonnumeric"
19
19
 
20
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
20
+ def set_options(
21
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
22
+ ) -> None:
21
23
  pass
22
24
 
23
25
  def load(self, folder: str) -> None:
@@ -0,0 +1,32 @@
1
+ """A reducer that removes correlation features via further heuristics."""
2
+
3
+ import optuna
4
+ import pandas as pd
5
+ from feature_engine.selection import SmartCorrelatedSelection
6
+
7
+ from .base_selector_reducer import BaseSelectorReducer
8
+
9
+ _SMART_CORRELATION_REDUCER_FILENAME = "smart_correlation_reducer.joblib"
10
+ _SMART_CORRELATION_REDUCER_THRESHOLD = "smart_correlation_reducer_threshold"
11
+
12
+
13
+ class SmartCorrelationReducer(BaseSelectorReducer):
14
+ """A class that removes smart correlated values from a dataset."""
15
+
16
+ def __init__(self) -> None:
17
+ self._correlation_selector = SmartCorrelatedSelection(missing_values="ignore")
18
+ super().__init__(
19
+ self._correlation_selector,
20
+ _SMART_CORRELATION_REDUCER_FILENAME,
21
+ )
22
+
23
+ @classmethod
24
+ def name(cls) -> str:
25
+ return "smart_correlation"
26
+
27
+ def set_options(
28
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
29
+ ) -> None:
30
+ self._correlation_selector.threshold = trial.suggest_float(
31
+ _SMART_CORRELATION_REDUCER_THRESHOLD, 0.1, 0.9
32
+ )
@@ -25,7 +25,9 @@ class UnseenReducer(Reducer):
25
25
  def name(cls) -> str:
26
26
  return "unseen"
27
27
 
28
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
28
+ def set_options(
29
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
30
+ ) -> None:
29
31
  pass
30
32
 
31
33
  def load(self, folder: str) -> None:
@@ -31,7 +31,9 @@ class Selector(Params, Fit):
31
31
  self._steps = 0
32
32
  self._selector = None
33
33
 
34
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
34
+ def set_options(
35
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
36
+ ) -> None:
35
37
  self._feature_ratio = trial.suggest_float("feature_ratio", 0.0, 1.0)
36
38
  self._steps = trial.suggest_int("steps", 1, 10)
37
39
 
@@ -28,6 +28,7 @@ from .windower.windower import Windower
28
28
  _SAMPLER_FILENAME = "sampler.pkl"
29
29
  _STUDYDB_FILENAME = "study.db"
30
30
  _PARAMS_FILENAME = "params.json"
31
+ _TRIAL_FILENAME = "trial.json"
31
32
  _TRIALS_KEY = "trials"
32
33
  _WALKFORWARD_TIMEDELTA_KEY = "walkforward_timedelta"
33
34
  _DAYS_KEY = "days"
@@ -36,7 +37,6 @@ _TEST_SIZE_KEY = "test_size"
36
37
  _VALIDATION_SIZE_KEY = "validation_size"
37
38
  _IDX_USR_ATTR_KEY = "idx"
38
39
  _DT_COLUMN_KEY = "dt_column"
39
- _MAX_FEATURES_KEY = "max_features"
40
40
 
41
41
 
42
42
  class Trainer(Fit):
@@ -54,7 +54,6 @@ class Trainer(Fit):
54
54
  dt_column: str | None = None,
55
55
  max_train_timeout: datetime.timedelta | None = None,
56
56
  cutoff_dt: datetime.datetime | None = None,
57
- max_features: int | None = None,
58
57
  ):
59
58
  tqdm.tqdm.pandas()
60
59
 
@@ -105,7 +104,6 @@ class Trainer(Fit):
105
104
  )
106
105
  if dt_column is None:
107
106
  dt_column = params[_DT_COLUMN_KEY]
108
- max_features = params.get(_MAX_FEATURES_KEY)
109
107
  else:
110
108
  with open(params_file, "w", encoding="utf8") as handle:
111
109
  validation_size_value = None
@@ -136,7 +134,6 @@ class Trainer(Fit):
136
134
  _TEST_SIZE_KEY: test_size_value,
137
135
  _VALIDATION_SIZE_KEY: validation_size_value,
138
136
  _DT_COLUMN_KEY: dt_column,
139
- _MAX_FEATURES_KEY: max_features,
140
137
  },
141
138
  handle,
142
139
  )
@@ -147,7 +144,6 @@ class Trainer(Fit):
147
144
  self._dt_column = dt_column
148
145
  self._max_train_timeout = max_train_timeout
149
146
  self._cutoff_dt = cutoff_dt
150
- self._max_features = max_features
151
147
 
152
148
  def _provide_study(self, column: str) -> optuna.Study:
153
149
  storage_name = f"sqlite:///{self._folder}/{column}/{_STUDYDB_FILENAME}"
@@ -203,6 +199,20 @@ class Trainer(Fit):
203
199
  ) -> float:
204
200
  print(f"Beginning trial for: {split_idx.isoformat()}")
205
201
  trial.set_user_attr(_IDX_USR_ATTR_KEY, split_idx.isoformat())
202
+ folder = os.path.join(
203
+ self._folder, str(y_series.name), split_idx.isoformat()
204
+ )
205
+ os.makedirs(folder, exist_ok=True)
206
+ trial_file = os.path.join(folder, _TRIAL_FILENAME)
207
+ if os.path.exists(trial_file):
208
+ with open(trial_file, encoding="utf8") as handle:
209
+ trial_info = json.load(handle)
210
+ if trial_info["number"] == trial.number:
211
+ logging.info(
212
+ "Found trial %d previously executed, skipping...",
213
+ trial.number,
214
+ )
215
+ return trial_info["output"]
206
216
 
207
217
  train_dt_index = dt_index[: len(x)]
208
218
  x_train = x[train_dt_index < split_idx] # type: ignore
@@ -213,7 +223,7 @@ class Trainer(Fit):
213
223
  try:
214
224
  # Window the data
215
225
  windower = Windower(self._dt_column)
216
- windower.set_options(trial)
226
+ windower.set_options(trial, x)
217
227
  x_train = windower.fit_transform(x_train)
218
228
  y_train = y_train[-len(x_train) :]
219
229
  if len(y_train.unique()) <= 1:
@@ -221,25 +231,25 @@ class Trainer(Fit):
221
231
  return -1.0
222
232
 
223
233
  # Perform common reductions
224
- reducer = CombinedReducer(self._max_features)
225
- reducer.set_options(trial)
234
+ reducer = CombinedReducer()
235
+ reducer.set_options(trial, x)
226
236
  x_train = reducer.fit_transform(x_train)
227
237
  x_test = reducer.transform(x_test)
228
238
 
229
239
  # Calculate the row weights
230
240
  weights = CombinedWeights()
231
- weights.set_options(trial)
241
+ weights.set_options(trial, x)
232
242
  w = weights.fit(x_train, y=y_train).transform(y_train.to_frame())[
233
243
  WEIGHTS_COLUMN
234
244
  ]
235
245
 
236
246
  # Create model
237
247
  model = ModelRouter()
238
- model.set_options(trial)
248
+ model.set_options(trial, x)
239
249
 
240
250
  # Train
241
251
  selector = Selector(model)
242
- selector.set_options(trial)
252
+ selector.set_options(trial, x)
243
253
  selector.fit(x_train, y=y_train, w=w, eval_x=x_test, eval_y=y_test)
244
254
  x_train = selector.transform(x_train)
245
255
  x_test = selector.transform(x_test)
@@ -249,27 +259,35 @@ class Trainer(Fit):
249
259
 
250
260
  # Calibrate
251
261
  calibrator = CalibratorRouter(model)
252
- calibrator.set_options(trial)
262
+ calibrator.set_options(trial, x)
253
263
  calibrator.fit(x_pred, y=y_train)
254
264
 
265
+ # Output
266
+ y_pred = model.transform(x_test)
267
+ y_pred = calibrator.transform(y_pred)
268
+ output = 0.0
269
+ if determine_model_type(y_series) == ModelType.REGRESSION:
270
+ output = float(r2_score(y_test, y_pred[[PREDICTION_COLUMN]]))
271
+ else:
272
+ output = float(f1_score(y_test, y_pred[[PREDICTION_COLUMN]]))
273
+
255
274
  if save:
256
- folder = os.path.join(
257
- self._folder, str(y_series.name), split_idx.isoformat()
258
- )
259
- if not os.path.exists(folder):
260
- os.mkdir(folder)
261
275
  windower.save(folder, trial)
262
276
  reducer.save(folder, trial)
263
277
  weights.save(folder, trial)
264
278
  model.save(folder, trial)
265
279
  selector.save(folder, trial)
266
280
  calibrator.save(folder, trial)
267
-
268
- y_pred = model.transform(x_test)
269
- y_pred = calibrator.transform(y_pred)
270
- if determine_model_type(y_series) == ModelType.REGRESSION:
271
- return float(r2_score(y_test, y_pred[[PREDICTION_COLUMN]]))
272
- return float(f1_score(y_test, y_pred[[PREDICTION_COLUMN]]))
281
+ with open(trial_file, "w", encoding="utf8") as handle:
282
+ json.dump(
283
+ {
284
+ "number": trial.number,
285
+ "output": output,
286
+ },
287
+ handle,
288
+ )
289
+
290
+ return output
273
291
  except WavetrainException as exc:
274
292
  logging.warning(str(exc))
275
293
  return -1.0
@@ -431,7 +449,7 @@ class Trainer(Fit):
431
449
  date_str = dates[-1].isoformat()
432
450
  folder = os.path.join(column_path, date_str)
433
451
 
434
- reducer = CombinedReducer(self._max_features)
452
+ reducer = CombinedReducer()
435
453
  reducer.load(folder)
436
454
 
437
455
  model = ModelRouter()
@@ -27,7 +27,9 @@ class ClassWeights(Weights):
27
27
  """The name of the weight class."""
28
28
  return "class"
29
29
 
30
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
30
+ def set_options(
31
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
32
+ ) -> None:
31
33
  pass
32
34
 
33
35
  def load(self, folder: str) -> None:
@@ -23,9 +23,11 @@ class CombinedWeights(Weights):
23
23
  def name(cls) -> str:
24
24
  return "combined"
25
25
 
26
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
26
+ def set_options(
27
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
28
+ ) -> None:
27
29
  for weights in self._weights:
28
- weights.set_options(trial)
30
+ weights.set_options(trial, df)
29
31
 
30
32
  def load(self, folder: str) -> None:
31
33
  for weights in self._weights:
@@ -19,7 +19,9 @@ class ExponentialWeights(Weights):
19
19
  """The name of the weight class."""
20
20
  return "exponential"
21
21
 
22
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
22
+ def set_options(
23
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
24
+ ) -> None:
23
25
  pass
24
26
 
25
27
  def load(self, folder: str) -> None:
@@ -19,7 +19,9 @@ class LinearWeights(Weights):
19
19
  """The name of the weight class."""
20
20
  return "linear"
21
21
 
22
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
22
+ def set_options(
23
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
24
+ ) -> None:
23
25
  pass
24
26
 
25
27
  def load(self, folder: str) -> None:
@@ -19,7 +19,9 @@ class NoopWeights(Weights):
19
19
  """The name of the weight class."""
20
20
  return "noop"
21
21
 
22
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
22
+ def set_options(
23
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
24
+ ) -> None:
23
25
  pass
24
26
 
25
27
  def load(self, folder: str) -> None:
@@ -20,7 +20,9 @@ class SigmoidWeights(Weights):
20
20
  """The name of the weight class."""
21
21
  return "sigmoid"
22
22
 
23
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
23
+ def set_options(
24
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
25
+ ) -> None:
24
26
  pass
25
27
 
26
28
  def load(self, folder: str) -> None:
@@ -38,7 +38,9 @@ class WeightsRouter(Weights):
38
38
  def name(cls) -> str:
39
39
  return "router"
40
40
 
41
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
41
+ def set_options(
42
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
43
+ ) -> None:
42
44
  self._weights = _WEIGHTS[
43
45
  trial.suggest_categorical("weights", list(_WEIGHTS.keys()))
44
46
  ]()
@@ -28,7 +28,9 @@ class Windower(Params, Fit):
28
28
  self._lookback_ratio = None
29
29
  self._dt_column = dt_column
30
30
 
31
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
31
+ def set_options(
32
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
33
+ ) -> None:
32
34
  self._lookback_ratio = trial.suggest_float("lookback", 0.1, 1.0)
33
35
 
34
36
  def load(self, folder: str) -> None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.0.27
3
+ Version: 0.0.29
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
@@ -40,8 +40,8 @@ wavetrainer/reducer/constant_reducer.py
40
40
  wavetrainer/reducer/correlation_reducer.py
41
41
  wavetrainer/reducer/duplicate_reducer.py
42
42
  wavetrainer/reducer/nonnumeric_reducer.py
43
- wavetrainer/reducer/pca_reducer.py
44
43
  wavetrainer/reducer/reducer.py
44
+ wavetrainer/reducer/smart_correlation_reducer.py
45
45
  wavetrainer/reducer/unseen_reducer.py
46
46
  wavetrainer/selector/__init__.py
47
47
  wavetrainer/selector/selector.py
@@ -1,77 +0,0 @@
1
- """A reducer that removes low variance columns."""
2
-
3
- import os
4
- from typing import Self
5
-
6
- import joblib # type: ignore
7
- import optuna
8
- import pandas as pd
9
- from sklearn.decomposition import PCA # type: ignore
10
- from sklearn.preprocessing import StandardScaler # type: ignore
11
-
12
- from .reducer import Reducer
13
-
14
- _PCA_FILE = "pca.joblib"
15
- _PCA_SCALER_FILE = "pca_scaler.joblib"
16
-
17
-
18
- class PCAReducer(Reducer):
19
- """A class that removes low variance columns from a dataframe."""
20
-
21
- # pylint: disable=too-many-positional-arguments,too-many-arguments
22
-
23
- def __init__(self, max_features: int | None):
24
- super().__init__()
25
- self._max_features = max_features
26
- if max_features is not None:
27
- self._scaler = StandardScaler()
28
- self._pca = PCA(n_components=max_features)
29
- else:
30
- self._scaler = None
31
- self._pca = None
32
-
33
- @classmethod
34
- def name(cls) -> str:
35
- return "pca"
36
-
37
- def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
38
- pass
39
-
40
- def load(self, folder: str) -> None:
41
- pca_scaler_file = os.path.join(folder, _PCA_SCALER_FILE)
42
- pca_file = os.path.join(folder, _PCA_FILE)
43
- if os.path.exists(pca_scaler_file):
44
- self._scaler = joblib.load(pca_scaler_file)
45
- if os.path.exists(pca_file):
46
- self._pca = joblib.load(pca_file)
47
-
48
- def save(self, folder: str, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
49
- if self._scaler is not None:
50
- joblib.dump(self._scaler, os.path.join(folder, _PCA_SCALER_FILE))
51
- if self._pca is not None:
52
- joblib.dump(self._pca, os.path.join(folder, _PCA_FILE))
53
-
54
- def fit(
55
- self,
56
- df: pd.DataFrame,
57
- y: pd.Series | pd.DataFrame | None = None,
58
- w: pd.Series | None = None,
59
- eval_x: pd.DataFrame | None = None,
60
- eval_y: pd.Series | pd.DataFrame | None = None,
61
- ) -> Self:
62
- pca = self._pca
63
- scaler = self._scaler
64
- if pca is None or scaler is None:
65
- return self
66
- if len(df.columns.values) < pca.n_components: # type: ignore
67
- return self
68
- x_scaled = scaler.fit_transform(df)
69
- pca.fit(x_scaled)
70
- return self
71
-
72
- def transform(self, df: pd.DataFrame) -> pd.DataFrame:
73
- if self._pca is None:
74
- return df
75
- if len(df.columns.values) < self._pca.n_components: # type: ignore
76
- return df
77
- return self._pca.transform(df)
File without changes
File without changes
File without changes
File without changes