wavetrainer 0.0.27__tar.gz → 0.0.28__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {wavetrainer-0.0.27/wavetrainer.egg-info → wavetrainer-0.0.28}/PKG-INFO +1 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/setup.py +1 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/__init__.py +1 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/calibrator/calibrator_router.py +7 -2
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/calibrator/mapie_calibrator.py +3 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/calibrator/vennabers_calibrator.py +3 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/create.py +0 -2
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/model/catboost_model.py +7 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/model/model.py +5 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/model/model_router.py +13 -3
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/model/tabpfn_model.py +7 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/params.py +4 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/reducer/base_selector_reducer.py +3 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/reducer/combined_reducer.py +9 -8
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/reducer/correlation_reducer.py +12 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/reducer/nonnumeric_reducer.py +3 -1
- wavetrainer-0.0.28/wavetrainer/reducer/smart_correlation_reducer.py +32 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/reducer/unseen_reducer.py +3 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/selector/selector.py +3 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/trainer.py +8 -13
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/weights/class_weights.py +3 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/weights/combined_weights.py +4 -2
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/weights/exponential_weights.py +3 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/weights/linear_weights.py +3 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/weights/noop_weights.py +3 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/weights/sigmoid_weights.py +3 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/weights/weights_router.py +3 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/windower/windower.py +3 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.28/wavetrainer.egg-info}/PKG-INFO +1 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer.egg-info/SOURCES.txt +1 -1
- wavetrainer-0.0.27/wavetrainer/reducer/pca_reducer.py +0 -77
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/LICENSE +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/MANIFEST.in +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/README.md +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/requirements.txt +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/setup.cfg +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/tests/__init__.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/tests/model/__init__.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/tests/model/catboost_kwargs_test.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/tests/trainer_test.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/calibrator/__init__.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/calibrator/calibrator.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/exceptions.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/fit.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/model/__init__.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/model/catboost_classifier_wrap.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/model/catboost_kwargs.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/model/catboost_regressor_wrap.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/model_type.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/reducer/__init__.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/reducer/constant_reducer.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/reducer/duplicate_reducer.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/reducer/reducer.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/selector/__init__.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/weights/__init__.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/weights/weights.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer/windower/__init__.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer.egg-info/dependency_links.txt +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer.egg-info/not-zip-safe +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer.egg-info/requires.txt +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.28}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
|
|
23
23
|
|
24
24
|
setup(
|
25
25
|
name='wavetrainer',
|
26
|
-
version='0.0.
|
26
|
+
version='0.0.28',
|
27
27
|
description='A library for automatically finding the optimal model within feature and hyperparameter space.',
|
28
28
|
long_description=long_description,
|
29
29
|
long_description_content_type='text/markdown',
|
@@ -36,8 +36,13 @@ class CalibratorRouter(Calibrator):
|
|
36
36
|
def name(cls) -> str:
|
37
37
|
return "router"
|
38
38
|
|
39
|
-
def set_options(
|
40
|
-
|
39
|
+
def set_options(
|
40
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
41
|
+
) -> None:
|
42
|
+
calibrator = self._calibrator
|
43
|
+
if calibrator is None:
|
44
|
+
return
|
45
|
+
calibrator.set_options(trial, df)
|
41
46
|
|
42
47
|
def load(self, folder: str) -> None:
|
43
48
|
with open(
|
@@ -29,7 +29,9 @@ class MAPIECalibrator(Calibrator):
|
|
29
29
|
def name(cls) -> str:
|
30
30
|
return "mapie"
|
31
31
|
|
32
|
-
def set_options(
|
32
|
+
def set_options(
|
33
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
34
|
+
) -> None:
|
33
35
|
pass
|
34
36
|
|
35
37
|
def load(self, folder: str) -> None:
|
@@ -27,7 +27,9 @@ class VennabersCalibrator(Calibrator):
|
|
27
27
|
def name(cls) -> str:
|
28
28
|
return "vennabers"
|
29
29
|
|
30
|
-
def set_options(
|
30
|
+
def set_options(
|
31
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
32
|
+
) -> None:
|
31
33
|
pass
|
32
34
|
|
33
35
|
def load(self, folder: str) -> None:
|
@@ -15,7 +15,6 @@ def create(
|
|
15
15
|
dt_column: str | None = None,
|
16
16
|
max_train_timeout: datetime.timedelta | None = None,
|
17
17
|
cutoff_dt: datetime.datetime | None = None,
|
18
|
-
max_features: int | None = None,
|
19
18
|
) -> Trainer:
|
20
19
|
"""Create a trainer."""
|
21
20
|
return Trainer(
|
@@ -26,5 +25,4 @@ def create(
|
|
26
25
|
dt_column=dt_column,
|
27
26
|
max_train_timeout=max_train_timeout,
|
28
27
|
cutoff_dt=cutoff_dt,
|
29
|
-
max_features=max_features,
|
30
28
|
)
|
@@ -48,6 +48,10 @@ class CatboostModel(Model):
|
|
48
48
|
def name(cls) -> str:
|
49
49
|
return "catboost"
|
50
50
|
|
51
|
+
@classmethod
|
52
|
+
def supports_x(cls, df: pd.DataFrame) -> bool:
|
53
|
+
return True
|
54
|
+
|
51
55
|
def __init__(self) -> None:
|
52
56
|
super().__init__()
|
53
57
|
self._catboost = None
|
@@ -86,7 +90,9 @@ class CatboostModel(Model):
|
|
86
90
|
"sample_weight": w,
|
87
91
|
}
|
88
92
|
|
89
|
-
def set_options(
|
93
|
+
def set_options(
|
94
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
95
|
+
) -> None:
|
90
96
|
self._iterations = trial.suggest_int(_ITERATIONS_KEY, 100, 10000)
|
91
97
|
self._learning_rate = trial.suggest_float(_LEARNING_RATE_KEY, 0.001, 0.3)
|
92
98
|
self._depth = trial.suggest_int(_DEPTH_KEY, 1, 10)
|
@@ -20,6 +20,11 @@ class Model(Params, Fit):
|
|
20
20
|
"""The name of the model."""
|
21
21
|
raise NotImplementedError("name not implemented in parent class.")
|
22
22
|
|
23
|
+
@classmethod
|
24
|
+
def supports_x(cls, df: pd.DataFrame) -> bool:
|
25
|
+
"""Whether the model supports the X values."""
|
26
|
+
raise NotImplementedError("supports_x not implemented in parent class.")
|
27
|
+
|
23
28
|
@property
|
24
29
|
def estimator(self) -> Any:
|
25
30
|
"""The estimator backing the model."""
|
@@ -34,6 +34,10 @@ class ModelRouter(Model):
|
|
34
34
|
def name(cls) -> str:
|
35
35
|
return "router"
|
36
36
|
|
37
|
+
@classmethod
|
38
|
+
def supports_x(cls, df: pd.DataFrame) -> bool:
|
39
|
+
return True
|
40
|
+
|
37
41
|
@property
|
38
42
|
def estimator(self) -> Any:
|
39
43
|
model = self._model
|
@@ -61,9 +65,15 @@ class ModelRouter(Model):
|
|
61
65
|
raise ValueError("model is null")
|
62
66
|
return model.pre_fit(df, y=y, eval_x=eval_x, eval_y=eval_y, w=w)
|
63
67
|
|
64
|
-
def set_options(
|
65
|
-
|
66
|
-
|
68
|
+
def set_options(
|
69
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
70
|
+
) -> None:
|
71
|
+
model = _MODELS[
|
72
|
+
trial.suggest_categorical(
|
73
|
+
"model", [k for k, v in _MODELS.items() if v.supports_x(df)]
|
74
|
+
)
|
75
|
+
]()
|
76
|
+
model.set_options(trial, df)
|
67
77
|
self._model = model
|
68
78
|
|
69
79
|
def load(self, folder: str) -> None:
|
@@ -31,6 +31,10 @@ class TabPFNModel(Model):
|
|
31
31
|
def name(cls) -> str:
|
32
32
|
return "tabpfn"
|
33
33
|
|
34
|
+
@classmethod
|
35
|
+
def supports_x(cls, df: pd.DataFrame) -> bool:
|
36
|
+
return len(df.columns.values) < 500
|
37
|
+
|
34
38
|
def __init__(self) -> None:
|
35
39
|
super().__init__()
|
36
40
|
self._tabpfn = None
|
@@ -57,7 +61,9 @@ class TabPFNModel(Model):
|
|
57
61
|
self._model_type = determine_model_type(y)
|
58
62
|
return {}
|
59
63
|
|
60
|
-
def set_options(
|
64
|
+
def set_options(
|
65
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
66
|
+
) -> None:
|
61
67
|
pass
|
62
68
|
|
63
69
|
def load(self, folder: str) -> None:
|
@@ -1,12 +1,15 @@
|
|
1
1
|
"""A class for loading/saving parameters."""
|
2
2
|
|
3
3
|
import optuna
|
4
|
+
import pandas as pd
|
4
5
|
|
5
6
|
|
6
7
|
class Params:
|
7
8
|
"""The params prototype class."""
|
8
9
|
|
9
|
-
def set_options(
|
10
|
+
def set_options(
|
11
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
12
|
+
) -> None:
|
10
13
|
"""Set the options used in the object."""
|
11
14
|
raise NotImplementedError("set_options not implemented in parent class.")
|
12
15
|
|
@@ -32,7 +32,9 @@ class BaseSelectorReducer(Reducer):
|
|
32
32
|
"""Whether the class should raise its exception if it encounters it."""
|
33
33
|
return True
|
34
34
|
|
35
|
-
def set_options(
|
35
|
+
def set_options(
|
36
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
37
|
+
) -> None:
|
36
38
|
pass
|
37
39
|
|
38
40
|
def load(self, folder: str) -> None:
|
@@ -12,8 +12,8 @@ from .constant_reducer import ConstantReducer
|
|
12
12
|
from .correlation_reducer import CorrelationReducer
|
13
13
|
from .duplicate_reducer import DuplicateReducer
|
14
14
|
from .nonnumeric_reducer import NonNumericReducer
|
15
|
-
from .pca_reducer import PCAReducer
|
16
15
|
from .reducer import Reducer
|
16
|
+
from .smart_correlation_reducer import SmartCorrelationReducer
|
17
17
|
from .unseen_reducer import UnseenReducer
|
18
18
|
|
19
19
|
_COMBINED_REDUCER_FILE = "combined_reducer.json"
|
@@ -25,25 +25,26 @@ class CombinedReducer(Reducer):
|
|
25
25
|
|
26
26
|
# pylint: disable=too-many-positional-arguments,too-many-arguments
|
27
27
|
|
28
|
-
def __init__(self
|
28
|
+
def __init__(self):
|
29
29
|
super().__init__()
|
30
|
-
self._max_features = max_features
|
31
30
|
self._reducers = [
|
32
31
|
UnseenReducer(),
|
33
32
|
NonNumericReducer(),
|
34
33
|
ConstantReducer(),
|
35
34
|
DuplicateReducer(),
|
36
35
|
CorrelationReducer(),
|
37
|
-
|
36
|
+
SmartCorrelationReducer(),
|
38
37
|
]
|
39
38
|
|
40
39
|
@classmethod
|
41
40
|
def name(cls) -> str:
|
42
41
|
return "combined"
|
43
42
|
|
44
|
-
def set_options(
|
43
|
+
def set_options(
|
44
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
45
|
+
) -> None:
|
45
46
|
for reducer in self._reducers:
|
46
|
-
reducer.set_options(trial)
|
47
|
+
reducer.set_options(trial, df)
|
47
48
|
|
48
49
|
def load(self, folder: str) -> None:
|
49
50
|
self._reducers = []
|
@@ -62,8 +63,8 @@ class CombinedReducer(Reducer):
|
|
62
63
|
self._reducers.append(NonNumericReducer())
|
63
64
|
elif reducer_name == UnseenReducer.name():
|
64
65
|
self._reducers.append(UnseenReducer())
|
65
|
-
elif reducer_name ==
|
66
|
-
self._reducers.append(
|
66
|
+
elif reducer_name == SmartCorrelationReducer.name():
|
67
|
+
self._reducers.append(SmartCorrelationReducer())
|
67
68
|
for reducer in self._reducers:
|
68
69
|
reducer.load(folder)
|
69
70
|
|
@@ -1,18 +1,22 @@
|
|
1
1
|
"""A reducer that removes correlation features."""
|
2
2
|
|
3
|
+
import optuna
|
4
|
+
import pandas as pd
|
3
5
|
from feature_engine.selection import DropCorrelatedFeatures
|
4
6
|
|
5
7
|
from .base_selector_reducer import BaseSelectorReducer
|
6
8
|
|
7
9
|
_CORRELATION_REDUCER_FILENAME = "correlation_reducer.joblib"
|
10
|
+
_CORRELATION_REDUCER_THRESHOLD = "correlation_reducer_threshold"
|
8
11
|
|
9
12
|
|
10
13
|
class CorrelationReducer(BaseSelectorReducer):
|
11
14
|
"""A class that removes correlated values from a dataset."""
|
12
15
|
|
13
16
|
def __init__(self) -> None:
|
17
|
+
self._correlation_selector = DropCorrelatedFeatures(missing_values="ignore")
|
14
18
|
super().__init__(
|
15
|
-
|
19
|
+
self._correlation_selector,
|
16
20
|
_CORRELATION_REDUCER_FILENAME,
|
17
21
|
)
|
18
22
|
|
@@ -23,3 +27,10 @@ class CorrelationReducer(BaseSelectorReducer):
|
|
23
27
|
@classmethod
|
24
28
|
def should_raise(cls) -> bool:
|
25
29
|
return False
|
30
|
+
|
31
|
+
def set_options(
|
32
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
33
|
+
) -> None:
|
34
|
+
self._correlation_selector.threshold = trial.suggest_float(
|
35
|
+
_CORRELATION_REDUCER_THRESHOLD, 0.1, 0.9
|
36
|
+
)
|
@@ -17,7 +17,9 @@ class NonNumericReducer(Reducer):
|
|
17
17
|
def name(cls) -> str:
|
18
18
|
return "nonnumeric"
|
19
19
|
|
20
|
-
def set_options(
|
20
|
+
def set_options(
|
21
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
22
|
+
) -> None:
|
21
23
|
pass
|
22
24
|
|
23
25
|
def load(self, folder: str) -> None:
|
@@ -0,0 +1,32 @@
|
|
1
|
+
"""A reducer that removes correlation features via further heuristics."""
|
2
|
+
|
3
|
+
import optuna
|
4
|
+
import pandas as pd
|
5
|
+
from feature_engine.selection import SmartCorrelatedSelection
|
6
|
+
|
7
|
+
from .base_selector_reducer import BaseSelectorReducer
|
8
|
+
|
9
|
+
_SMART_CORRELATION_REDUCER_FILENAME = "smart_correlation_reducer.joblib"
|
10
|
+
_SMART_CORRELATION_REDUCER_THRESHOLD = "smart_correlation_reducer_threshold"
|
11
|
+
|
12
|
+
|
13
|
+
class SmartCorrelationReducer(BaseSelectorReducer):
|
14
|
+
"""A class that removes smart correlated values from a dataset."""
|
15
|
+
|
16
|
+
def __init__(self) -> None:
|
17
|
+
self._correlation_selector = SmartCorrelatedSelection(missing_values="ignore")
|
18
|
+
super().__init__(
|
19
|
+
self._correlation_selector,
|
20
|
+
_SMART_CORRELATION_REDUCER_FILENAME,
|
21
|
+
)
|
22
|
+
|
23
|
+
@classmethod
|
24
|
+
def name(cls) -> str:
|
25
|
+
return "smart_correlation"
|
26
|
+
|
27
|
+
def set_options(
|
28
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
29
|
+
) -> None:
|
30
|
+
self._correlation_selector.threshold = trial.suggest_float(
|
31
|
+
_SMART_CORRELATION_REDUCER_THRESHOLD, 0.1, 0.9
|
32
|
+
)
|
@@ -25,7 +25,9 @@ class UnseenReducer(Reducer):
|
|
25
25
|
def name(cls) -> str:
|
26
26
|
return "unseen"
|
27
27
|
|
28
|
-
def set_options(
|
28
|
+
def set_options(
|
29
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
30
|
+
) -> None:
|
29
31
|
pass
|
30
32
|
|
31
33
|
def load(self, folder: str) -> None:
|
@@ -31,7 +31,9 @@ class Selector(Params, Fit):
|
|
31
31
|
self._steps = 0
|
32
32
|
self._selector = None
|
33
33
|
|
34
|
-
def set_options(
|
34
|
+
def set_options(
|
35
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
36
|
+
) -> None:
|
35
37
|
self._feature_ratio = trial.suggest_float("feature_ratio", 0.0, 1.0)
|
36
38
|
self._steps = trial.suggest_int("steps", 1, 10)
|
37
39
|
|
@@ -36,7 +36,6 @@ _TEST_SIZE_KEY = "test_size"
|
|
36
36
|
_VALIDATION_SIZE_KEY = "validation_size"
|
37
37
|
_IDX_USR_ATTR_KEY = "idx"
|
38
38
|
_DT_COLUMN_KEY = "dt_column"
|
39
|
-
_MAX_FEATURES_KEY = "max_features"
|
40
39
|
|
41
40
|
|
42
41
|
class Trainer(Fit):
|
@@ -54,7 +53,6 @@ class Trainer(Fit):
|
|
54
53
|
dt_column: str | None = None,
|
55
54
|
max_train_timeout: datetime.timedelta | None = None,
|
56
55
|
cutoff_dt: datetime.datetime | None = None,
|
57
|
-
max_features: int | None = None,
|
58
56
|
):
|
59
57
|
tqdm.tqdm.pandas()
|
60
58
|
|
@@ -105,7 +103,6 @@ class Trainer(Fit):
|
|
105
103
|
)
|
106
104
|
if dt_column is None:
|
107
105
|
dt_column = params[_DT_COLUMN_KEY]
|
108
|
-
max_features = params.get(_MAX_FEATURES_KEY)
|
109
106
|
else:
|
110
107
|
with open(params_file, "w", encoding="utf8") as handle:
|
111
108
|
validation_size_value = None
|
@@ -136,7 +133,6 @@ class Trainer(Fit):
|
|
136
133
|
_TEST_SIZE_KEY: test_size_value,
|
137
134
|
_VALIDATION_SIZE_KEY: validation_size_value,
|
138
135
|
_DT_COLUMN_KEY: dt_column,
|
139
|
-
_MAX_FEATURES_KEY: max_features,
|
140
136
|
},
|
141
137
|
handle,
|
142
138
|
)
|
@@ -147,7 +143,6 @@ class Trainer(Fit):
|
|
147
143
|
self._dt_column = dt_column
|
148
144
|
self._max_train_timeout = max_train_timeout
|
149
145
|
self._cutoff_dt = cutoff_dt
|
150
|
-
self._max_features = max_features
|
151
146
|
|
152
147
|
def _provide_study(self, column: str) -> optuna.Study:
|
153
148
|
storage_name = f"sqlite:///{self._folder}/{column}/{_STUDYDB_FILENAME}"
|
@@ -213,7 +208,7 @@ class Trainer(Fit):
|
|
213
208
|
try:
|
214
209
|
# Window the data
|
215
210
|
windower = Windower(self._dt_column)
|
216
|
-
windower.set_options(trial)
|
211
|
+
windower.set_options(trial, x)
|
217
212
|
x_train = windower.fit_transform(x_train)
|
218
213
|
y_train = y_train[-len(x_train) :]
|
219
214
|
if len(y_train.unique()) <= 1:
|
@@ -221,25 +216,25 @@ class Trainer(Fit):
|
|
221
216
|
return -1.0
|
222
217
|
|
223
218
|
# Perform common reductions
|
224
|
-
reducer = CombinedReducer(
|
225
|
-
reducer.set_options(trial)
|
219
|
+
reducer = CombinedReducer()
|
220
|
+
reducer.set_options(trial, x)
|
226
221
|
x_train = reducer.fit_transform(x_train)
|
227
222
|
x_test = reducer.transform(x_test)
|
228
223
|
|
229
224
|
# Calculate the row weights
|
230
225
|
weights = CombinedWeights()
|
231
|
-
weights.set_options(trial)
|
226
|
+
weights.set_options(trial, x)
|
232
227
|
w = weights.fit(x_train, y=y_train).transform(y_train.to_frame())[
|
233
228
|
WEIGHTS_COLUMN
|
234
229
|
]
|
235
230
|
|
236
231
|
# Create model
|
237
232
|
model = ModelRouter()
|
238
|
-
model.set_options(trial)
|
233
|
+
model.set_options(trial, x)
|
239
234
|
|
240
235
|
# Train
|
241
236
|
selector = Selector(model)
|
242
|
-
selector.set_options(trial)
|
237
|
+
selector.set_options(trial, x)
|
243
238
|
selector.fit(x_train, y=y_train, w=w, eval_x=x_test, eval_y=y_test)
|
244
239
|
x_train = selector.transform(x_train)
|
245
240
|
x_test = selector.transform(x_test)
|
@@ -249,7 +244,7 @@ class Trainer(Fit):
|
|
249
244
|
|
250
245
|
# Calibrate
|
251
246
|
calibrator = CalibratorRouter(model)
|
252
|
-
calibrator.set_options(trial)
|
247
|
+
calibrator.set_options(trial, x)
|
253
248
|
calibrator.fit(x_pred, y=y_train)
|
254
249
|
|
255
250
|
if save:
|
@@ -431,7 +426,7 @@ class Trainer(Fit):
|
|
431
426
|
date_str = dates[-1].isoformat()
|
432
427
|
folder = os.path.join(column_path, date_str)
|
433
428
|
|
434
|
-
reducer = CombinedReducer(
|
429
|
+
reducer = CombinedReducer()
|
435
430
|
reducer.load(folder)
|
436
431
|
|
437
432
|
model = ModelRouter()
|
@@ -27,7 +27,9 @@ class ClassWeights(Weights):
|
|
27
27
|
"""The name of the weight class."""
|
28
28
|
return "class"
|
29
29
|
|
30
|
-
def set_options(
|
30
|
+
def set_options(
|
31
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
32
|
+
) -> None:
|
31
33
|
pass
|
32
34
|
|
33
35
|
def load(self, folder: str) -> None:
|
@@ -23,9 +23,11 @@ class CombinedWeights(Weights):
|
|
23
23
|
def name(cls) -> str:
|
24
24
|
return "combined"
|
25
25
|
|
26
|
-
def set_options(
|
26
|
+
def set_options(
|
27
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
28
|
+
) -> None:
|
27
29
|
for weights in self._weights:
|
28
|
-
weights.set_options(trial)
|
30
|
+
weights.set_options(trial, df)
|
29
31
|
|
30
32
|
def load(self, folder: str) -> None:
|
31
33
|
for weights in self._weights:
|
@@ -19,7 +19,9 @@ class ExponentialWeights(Weights):
|
|
19
19
|
"""The name of the weight class."""
|
20
20
|
return "exponential"
|
21
21
|
|
22
|
-
def set_options(
|
22
|
+
def set_options(
|
23
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
24
|
+
) -> None:
|
23
25
|
pass
|
24
26
|
|
25
27
|
def load(self, folder: str) -> None:
|
@@ -19,7 +19,9 @@ class LinearWeights(Weights):
|
|
19
19
|
"""The name of the weight class."""
|
20
20
|
return "linear"
|
21
21
|
|
22
|
-
def set_options(
|
22
|
+
def set_options(
|
23
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
24
|
+
) -> None:
|
23
25
|
pass
|
24
26
|
|
25
27
|
def load(self, folder: str) -> None:
|
@@ -19,7 +19,9 @@ class NoopWeights(Weights):
|
|
19
19
|
"""The name of the weight class."""
|
20
20
|
return "noop"
|
21
21
|
|
22
|
-
def set_options(
|
22
|
+
def set_options(
|
23
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
24
|
+
) -> None:
|
23
25
|
pass
|
24
26
|
|
25
27
|
def load(self, folder: str) -> None:
|
@@ -20,7 +20,9 @@ class SigmoidWeights(Weights):
|
|
20
20
|
"""The name of the weight class."""
|
21
21
|
return "sigmoid"
|
22
22
|
|
23
|
-
def set_options(
|
23
|
+
def set_options(
|
24
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
25
|
+
) -> None:
|
24
26
|
pass
|
25
27
|
|
26
28
|
def load(self, folder: str) -> None:
|
@@ -38,7 +38,9 @@ class WeightsRouter(Weights):
|
|
38
38
|
def name(cls) -> str:
|
39
39
|
return "router"
|
40
40
|
|
41
|
-
def set_options(
|
41
|
+
def set_options(
|
42
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
43
|
+
) -> None:
|
42
44
|
self._weights = _WEIGHTS[
|
43
45
|
trial.suggest_categorical("weights", list(_WEIGHTS.keys()))
|
44
46
|
]()
|
@@ -28,7 +28,9 @@ class Windower(Params, Fit):
|
|
28
28
|
self._lookback_ratio = None
|
29
29
|
self._dt_column = dt_column
|
30
30
|
|
31
|
-
def set_options(
|
31
|
+
def set_options(
|
32
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
33
|
+
) -> None:
|
32
34
|
self._lookback_ratio = trial.suggest_float("lookback", 0.1, 1.0)
|
33
35
|
|
34
36
|
def load(self, folder: str) -> None:
|
@@ -40,8 +40,8 @@ wavetrainer/reducer/constant_reducer.py
|
|
40
40
|
wavetrainer/reducer/correlation_reducer.py
|
41
41
|
wavetrainer/reducer/duplicate_reducer.py
|
42
42
|
wavetrainer/reducer/nonnumeric_reducer.py
|
43
|
-
wavetrainer/reducer/pca_reducer.py
|
44
43
|
wavetrainer/reducer/reducer.py
|
44
|
+
wavetrainer/reducer/smart_correlation_reducer.py
|
45
45
|
wavetrainer/reducer/unseen_reducer.py
|
46
46
|
wavetrainer/selector/__init__.py
|
47
47
|
wavetrainer/selector/selector.py
|
@@ -1,77 +0,0 @@
|
|
1
|
-
"""A reducer that removes low variance columns."""
|
2
|
-
|
3
|
-
import os
|
4
|
-
from typing import Self
|
5
|
-
|
6
|
-
import joblib # type: ignore
|
7
|
-
import optuna
|
8
|
-
import pandas as pd
|
9
|
-
from sklearn.decomposition import PCA # type: ignore
|
10
|
-
from sklearn.preprocessing import StandardScaler # type: ignore
|
11
|
-
|
12
|
-
from .reducer import Reducer
|
13
|
-
|
14
|
-
_PCA_FILE = "pca.joblib"
|
15
|
-
_PCA_SCALER_FILE = "pca_scaler.joblib"
|
16
|
-
|
17
|
-
|
18
|
-
class PCAReducer(Reducer):
|
19
|
-
"""A class that removes low variance columns from a dataframe."""
|
20
|
-
|
21
|
-
# pylint: disable=too-many-positional-arguments,too-many-arguments
|
22
|
-
|
23
|
-
def __init__(self, max_features: int | None):
|
24
|
-
super().__init__()
|
25
|
-
self._max_features = max_features
|
26
|
-
if max_features is not None:
|
27
|
-
self._scaler = StandardScaler()
|
28
|
-
self._pca = PCA(n_components=max_features)
|
29
|
-
else:
|
30
|
-
self._scaler = None
|
31
|
-
self._pca = None
|
32
|
-
|
33
|
-
@classmethod
|
34
|
-
def name(cls) -> str:
|
35
|
-
return "pca"
|
36
|
-
|
37
|
-
def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
|
38
|
-
pass
|
39
|
-
|
40
|
-
def load(self, folder: str) -> None:
|
41
|
-
pca_scaler_file = os.path.join(folder, _PCA_SCALER_FILE)
|
42
|
-
pca_file = os.path.join(folder, _PCA_FILE)
|
43
|
-
if os.path.exists(pca_scaler_file):
|
44
|
-
self._scaler = joblib.load(pca_scaler_file)
|
45
|
-
if os.path.exists(pca_file):
|
46
|
-
self._pca = joblib.load(pca_file)
|
47
|
-
|
48
|
-
def save(self, folder: str, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
|
49
|
-
if self._scaler is not None:
|
50
|
-
joblib.dump(self._scaler, os.path.join(folder, _PCA_SCALER_FILE))
|
51
|
-
if self._pca is not None:
|
52
|
-
joblib.dump(self._pca, os.path.join(folder, _PCA_FILE))
|
53
|
-
|
54
|
-
def fit(
|
55
|
-
self,
|
56
|
-
df: pd.DataFrame,
|
57
|
-
y: pd.Series | pd.DataFrame | None = None,
|
58
|
-
w: pd.Series | None = None,
|
59
|
-
eval_x: pd.DataFrame | None = None,
|
60
|
-
eval_y: pd.Series | pd.DataFrame | None = None,
|
61
|
-
) -> Self:
|
62
|
-
pca = self._pca
|
63
|
-
scaler = self._scaler
|
64
|
-
if pca is None or scaler is None:
|
65
|
-
return self
|
66
|
-
if len(df.columns.values) < pca.n_components: # type: ignore
|
67
|
-
return self
|
68
|
-
x_scaled = scaler.fit_transform(df)
|
69
|
-
pca.fit(x_scaled)
|
70
|
-
return self
|
71
|
-
|
72
|
-
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
73
|
-
if self._pca is None:
|
74
|
-
return df
|
75
|
-
if len(df.columns.values) < self._pca.n_components: # type: ignore
|
76
|
-
return df
|
77
|
-
return self._pca.transform(df)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|