wavetrainer 0.0.27__tar.gz → 0.0.29__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {wavetrainer-0.0.27/wavetrainer.egg-info → wavetrainer-0.0.29}/PKG-INFO +1 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/setup.py +1 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/__init__.py +1 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/calibrator/calibrator_router.py +7 -2
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/calibrator/mapie_calibrator.py +3 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/calibrator/vennabers_calibrator.py +3 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/create.py +0 -2
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/model/catboost_model.py +7 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/model/model.py +5 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/model/model_router.py +13 -3
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/model/tabpfn_model.py +7 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/params.py +4 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/reducer/base_selector_reducer.py +3 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/reducer/combined_reducer.py +9 -8
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/reducer/correlation_reducer.py +12 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/reducer/nonnumeric_reducer.py +3 -1
- wavetrainer-0.0.29/wavetrainer/reducer/smart_correlation_reducer.py +32 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/reducer/unseen_reducer.py +3 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/selector/selector.py +3 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/trainer.py +42 -24
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/weights/class_weights.py +3 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/weights/combined_weights.py +4 -2
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/weights/exponential_weights.py +3 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/weights/linear_weights.py +3 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/weights/noop_weights.py +3 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/weights/sigmoid_weights.py +3 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/weights/weights_router.py +3 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/windower/windower.py +3 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.29/wavetrainer.egg-info}/PKG-INFO +1 -1
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer.egg-info/SOURCES.txt +1 -1
- wavetrainer-0.0.27/wavetrainer/reducer/pca_reducer.py +0 -77
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/LICENSE +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/MANIFEST.in +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/README.md +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/requirements.txt +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/setup.cfg +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/tests/__init__.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/tests/model/__init__.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/tests/model/catboost_kwargs_test.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/tests/trainer_test.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/calibrator/__init__.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/calibrator/calibrator.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/exceptions.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/fit.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/model/__init__.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/model/catboost_classifier_wrap.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/model/catboost_kwargs.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/model/catboost_regressor_wrap.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/model_type.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/reducer/__init__.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/reducer/constant_reducer.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/reducer/duplicate_reducer.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/reducer/reducer.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/selector/__init__.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/weights/__init__.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/weights/weights.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer/windower/__init__.py +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer.egg-info/dependency_links.txt +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer.egg-info/not-zip-safe +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer.egg-info/requires.txt +0 -0
- {wavetrainer-0.0.27 → wavetrainer-0.0.29}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
|
|
23
23
|
|
24
24
|
setup(
|
25
25
|
name='wavetrainer',
|
26
|
-
version='0.0.
|
26
|
+
version='0.0.29',
|
27
27
|
description='A library for automatically finding the optimal model within feature and hyperparameter space.',
|
28
28
|
long_description=long_description,
|
29
29
|
long_description_content_type='text/markdown',
|
@@ -36,8 +36,13 @@ class CalibratorRouter(Calibrator):
|
|
36
36
|
def name(cls) -> str:
|
37
37
|
return "router"
|
38
38
|
|
39
|
-
def set_options(
|
40
|
-
|
39
|
+
def set_options(
|
40
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
41
|
+
) -> None:
|
42
|
+
calibrator = self._calibrator
|
43
|
+
if calibrator is None:
|
44
|
+
return
|
45
|
+
calibrator.set_options(trial, df)
|
41
46
|
|
42
47
|
def load(self, folder: str) -> None:
|
43
48
|
with open(
|
@@ -29,7 +29,9 @@ class MAPIECalibrator(Calibrator):
|
|
29
29
|
def name(cls) -> str:
|
30
30
|
return "mapie"
|
31
31
|
|
32
|
-
def set_options(
|
32
|
+
def set_options(
|
33
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
34
|
+
) -> None:
|
33
35
|
pass
|
34
36
|
|
35
37
|
def load(self, folder: str) -> None:
|
@@ -27,7 +27,9 @@ class VennabersCalibrator(Calibrator):
|
|
27
27
|
def name(cls) -> str:
|
28
28
|
return "vennabers"
|
29
29
|
|
30
|
-
def set_options(
|
30
|
+
def set_options(
|
31
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
32
|
+
) -> None:
|
31
33
|
pass
|
32
34
|
|
33
35
|
def load(self, folder: str) -> None:
|
@@ -15,7 +15,6 @@ def create(
|
|
15
15
|
dt_column: str | None = None,
|
16
16
|
max_train_timeout: datetime.timedelta | None = None,
|
17
17
|
cutoff_dt: datetime.datetime | None = None,
|
18
|
-
max_features: int | None = None,
|
19
18
|
) -> Trainer:
|
20
19
|
"""Create a trainer."""
|
21
20
|
return Trainer(
|
@@ -26,5 +25,4 @@ def create(
|
|
26
25
|
dt_column=dt_column,
|
27
26
|
max_train_timeout=max_train_timeout,
|
28
27
|
cutoff_dt=cutoff_dt,
|
29
|
-
max_features=max_features,
|
30
28
|
)
|
@@ -48,6 +48,10 @@ class CatboostModel(Model):
|
|
48
48
|
def name(cls) -> str:
|
49
49
|
return "catboost"
|
50
50
|
|
51
|
+
@classmethod
|
52
|
+
def supports_x(cls, df: pd.DataFrame) -> bool:
|
53
|
+
return True
|
54
|
+
|
51
55
|
def __init__(self) -> None:
|
52
56
|
super().__init__()
|
53
57
|
self._catboost = None
|
@@ -86,7 +90,9 @@ class CatboostModel(Model):
|
|
86
90
|
"sample_weight": w,
|
87
91
|
}
|
88
92
|
|
89
|
-
def set_options(
|
93
|
+
def set_options(
|
94
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
95
|
+
) -> None:
|
90
96
|
self._iterations = trial.suggest_int(_ITERATIONS_KEY, 100, 10000)
|
91
97
|
self._learning_rate = trial.suggest_float(_LEARNING_RATE_KEY, 0.001, 0.3)
|
92
98
|
self._depth = trial.suggest_int(_DEPTH_KEY, 1, 10)
|
@@ -20,6 +20,11 @@ class Model(Params, Fit):
|
|
20
20
|
"""The name of the model."""
|
21
21
|
raise NotImplementedError("name not implemented in parent class.")
|
22
22
|
|
23
|
+
@classmethod
|
24
|
+
def supports_x(cls, df: pd.DataFrame) -> bool:
|
25
|
+
"""Whether the model supports the X values."""
|
26
|
+
raise NotImplementedError("supports_x not implemented in parent class.")
|
27
|
+
|
23
28
|
@property
|
24
29
|
def estimator(self) -> Any:
|
25
30
|
"""The estimator backing the model."""
|
@@ -34,6 +34,10 @@ class ModelRouter(Model):
|
|
34
34
|
def name(cls) -> str:
|
35
35
|
return "router"
|
36
36
|
|
37
|
+
@classmethod
|
38
|
+
def supports_x(cls, df: pd.DataFrame) -> bool:
|
39
|
+
return True
|
40
|
+
|
37
41
|
@property
|
38
42
|
def estimator(self) -> Any:
|
39
43
|
model = self._model
|
@@ -61,9 +65,15 @@ class ModelRouter(Model):
|
|
61
65
|
raise ValueError("model is null")
|
62
66
|
return model.pre_fit(df, y=y, eval_x=eval_x, eval_y=eval_y, w=w)
|
63
67
|
|
64
|
-
def set_options(
|
65
|
-
|
66
|
-
|
68
|
+
def set_options(
|
69
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
70
|
+
) -> None:
|
71
|
+
model = _MODELS[
|
72
|
+
trial.suggest_categorical(
|
73
|
+
"model", [k for k, v in _MODELS.items() if v.supports_x(df)]
|
74
|
+
)
|
75
|
+
]()
|
76
|
+
model.set_options(trial, df)
|
67
77
|
self._model = model
|
68
78
|
|
69
79
|
def load(self, folder: str) -> None:
|
@@ -31,6 +31,10 @@ class TabPFNModel(Model):
|
|
31
31
|
def name(cls) -> str:
|
32
32
|
return "tabpfn"
|
33
33
|
|
34
|
+
@classmethod
|
35
|
+
def supports_x(cls, df: pd.DataFrame) -> bool:
|
36
|
+
return len(df.columns.values) < 500
|
37
|
+
|
34
38
|
def __init__(self) -> None:
|
35
39
|
super().__init__()
|
36
40
|
self._tabpfn = None
|
@@ -57,7 +61,9 @@ class TabPFNModel(Model):
|
|
57
61
|
self._model_type = determine_model_type(y)
|
58
62
|
return {}
|
59
63
|
|
60
|
-
def set_options(
|
64
|
+
def set_options(
|
65
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
66
|
+
) -> None:
|
61
67
|
pass
|
62
68
|
|
63
69
|
def load(self, folder: str) -> None:
|
@@ -1,12 +1,15 @@
|
|
1
1
|
"""A class for loading/saving parameters."""
|
2
2
|
|
3
3
|
import optuna
|
4
|
+
import pandas as pd
|
4
5
|
|
5
6
|
|
6
7
|
class Params:
|
7
8
|
"""The params prototype class."""
|
8
9
|
|
9
|
-
def set_options(
|
10
|
+
def set_options(
|
11
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
12
|
+
) -> None:
|
10
13
|
"""Set the options used in the object."""
|
11
14
|
raise NotImplementedError("set_options not implemented in parent class.")
|
12
15
|
|
@@ -32,7 +32,9 @@ class BaseSelectorReducer(Reducer):
|
|
32
32
|
"""Whether the class should raise its exception if it encounters it."""
|
33
33
|
return True
|
34
34
|
|
35
|
-
def set_options(
|
35
|
+
def set_options(
|
36
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
37
|
+
) -> None:
|
36
38
|
pass
|
37
39
|
|
38
40
|
def load(self, folder: str) -> None:
|
@@ -12,8 +12,8 @@ from .constant_reducer import ConstantReducer
|
|
12
12
|
from .correlation_reducer import CorrelationReducer
|
13
13
|
from .duplicate_reducer import DuplicateReducer
|
14
14
|
from .nonnumeric_reducer import NonNumericReducer
|
15
|
-
from .pca_reducer import PCAReducer
|
16
15
|
from .reducer import Reducer
|
16
|
+
from .smart_correlation_reducer import SmartCorrelationReducer
|
17
17
|
from .unseen_reducer import UnseenReducer
|
18
18
|
|
19
19
|
_COMBINED_REDUCER_FILE = "combined_reducer.json"
|
@@ -25,25 +25,26 @@ class CombinedReducer(Reducer):
|
|
25
25
|
|
26
26
|
# pylint: disable=too-many-positional-arguments,too-many-arguments
|
27
27
|
|
28
|
-
def __init__(self
|
28
|
+
def __init__(self):
|
29
29
|
super().__init__()
|
30
|
-
self._max_features = max_features
|
31
30
|
self._reducers = [
|
32
31
|
UnseenReducer(),
|
33
32
|
NonNumericReducer(),
|
34
33
|
ConstantReducer(),
|
35
34
|
DuplicateReducer(),
|
36
35
|
CorrelationReducer(),
|
37
|
-
|
36
|
+
SmartCorrelationReducer(),
|
38
37
|
]
|
39
38
|
|
40
39
|
@classmethod
|
41
40
|
def name(cls) -> str:
|
42
41
|
return "combined"
|
43
42
|
|
44
|
-
def set_options(
|
43
|
+
def set_options(
|
44
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
45
|
+
) -> None:
|
45
46
|
for reducer in self._reducers:
|
46
|
-
reducer.set_options(trial)
|
47
|
+
reducer.set_options(trial, df)
|
47
48
|
|
48
49
|
def load(self, folder: str) -> None:
|
49
50
|
self._reducers = []
|
@@ -62,8 +63,8 @@ class CombinedReducer(Reducer):
|
|
62
63
|
self._reducers.append(NonNumericReducer())
|
63
64
|
elif reducer_name == UnseenReducer.name():
|
64
65
|
self._reducers.append(UnseenReducer())
|
65
|
-
elif reducer_name ==
|
66
|
-
self._reducers.append(
|
66
|
+
elif reducer_name == SmartCorrelationReducer.name():
|
67
|
+
self._reducers.append(SmartCorrelationReducer())
|
67
68
|
for reducer in self._reducers:
|
68
69
|
reducer.load(folder)
|
69
70
|
|
@@ -1,18 +1,22 @@
|
|
1
1
|
"""A reducer that removes correlation features."""
|
2
2
|
|
3
|
+
import optuna
|
4
|
+
import pandas as pd
|
3
5
|
from feature_engine.selection import DropCorrelatedFeatures
|
4
6
|
|
5
7
|
from .base_selector_reducer import BaseSelectorReducer
|
6
8
|
|
7
9
|
_CORRELATION_REDUCER_FILENAME = "correlation_reducer.joblib"
|
10
|
+
_CORRELATION_REDUCER_THRESHOLD = "correlation_reducer_threshold"
|
8
11
|
|
9
12
|
|
10
13
|
class CorrelationReducer(BaseSelectorReducer):
|
11
14
|
"""A class that removes correlated values from a dataset."""
|
12
15
|
|
13
16
|
def __init__(self) -> None:
|
17
|
+
self._correlation_selector = DropCorrelatedFeatures(missing_values="ignore")
|
14
18
|
super().__init__(
|
15
|
-
|
19
|
+
self._correlation_selector,
|
16
20
|
_CORRELATION_REDUCER_FILENAME,
|
17
21
|
)
|
18
22
|
|
@@ -23,3 +27,10 @@ class CorrelationReducer(BaseSelectorReducer):
|
|
23
27
|
@classmethod
|
24
28
|
def should_raise(cls) -> bool:
|
25
29
|
return False
|
30
|
+
|
31
|
+
def set_options(
|
32
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
33
|
+
) -> None:
|
34
|
+
self._correlation_selector.threshold = trial.suggest_float(
|
35
|
+
_CORRELATION_REDUCER_THRESHOLD, 0.1, 0.9
|
36
|
+
)
|
@@ -17,7 +17,9 @@ class NonNumericReducer(Reducer):
|
|
17
17
|
def name(cls) -> str:
|
18
18
|
return "nonnumeric"
|
19
19
|
|
20
|
-
def set_options(
|
20
|
+
def set_options(
|
21
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
22
|
+
) -> None:
|
21
23
|
pass
|
22
24
|
|
23
25
|
def load(self, folder: str) -> None:
|
@@ -0,0 +1,32 @@
|
|
1
|
+
"""A reducer that removes correlation features via further heuristics."""
|
2
|
+
|
3
|
+
import optuna
|
4
|
+
import pandas as pd
|
5
|
+
from feature_engine.selection import SmartCorrelatedSelection
|
6
|
+
|
7
|
+
from .base_selector_reducer import BaseSelectorReducer
|
8
|
+
|
9
|
+
_SMART_CORRELATION_REDUCER_FILENAME = "smart_correlation_reducer.joblib"
|
10
|
+
_SMART_CORRELATION_REDUCER_THRESHOLD = "smart_correlation_reducer_threshold"
|
11
|
+
|
12
|
+
|
13
|
+
class SmartCorrelationReducer(BaseSelectorReducer):
|
14
|
+
"""A class that removes smart correlated values from a dataset."""
|
15
|
+
|
16
|
+
def __init__(self) -> None:
|
17
|
+
self._correlation_selector = SmartCorrelatedSelection(missing_values="ignore")
|
18
|
+
super().__init__(
|
19
|
+
self._correlation_selector,
|
20
|
+
_SMART_CORRELATION_REDUCER_FILENAME,
|
21
|
+
)
|
22
|
+
|
23
|
+
@classmethod
|
24
|
+
def name(cls) -> str:
|
25
|
+
return "smart_correlation"
|
26
|
+
|
27
|
+
def set_options(
|
28
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
29
|
+
) -> None:
|
30
|
+
self._correlation_selector.threshold = trial.suggest_float(
|
31
|
+
_SMART_CORRELATION_REDUCER_THRESHOLD, 0.1, 0.9
|
32
|
+
)
|
@@ -25,7 +25,9 @@ class UnseenReducer(Reducer):
|
|
25
25
|
def name(cls) -> str:
|
26
26
|
return "unseen"
|
27
27
|
|
28
|
-
def set_options(
|
28
|
+
def set_options(
|
29
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
30
|
+
) -> None:
|
29
31
|
pass
|
30
32
|
|
31
33
|
def load(self, folder: str) -> None:
|
@@ -31,7 +31,9 @@ class Selector(Params, Fit):
|
|
31
31
|
self._steps = 0
|
32
32
|
self._selector = None
|
33
33
|
|
34
|
-
def set_options(
|
34
|
+
def set_options(
|
35
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
36
|
+
) -> None:
|
35
37
|
self._feature_ratio = trial.suggest_float("feature_ratio", 0.0, 1.0)
|
36
38
|
self._steps = trial.suggest_int("steps", 1, 10)
|
37
39
|
|
@@ -28,6 +28,7 @@ from .windower.windower import Windower
|
|
28
28
|
_SAMPLER_FILENAME = "sampler.pkl"
|
29
29
|
_STUDYDB_FILENAME = "study.db"
|
30
30
|
_PARAMS_FILENAME = "params.json"
|
31
|
+
_TRIAL_FILENAME = "trial.json"
|
31
32
|
_TRIALS_KEY = "trials"
|
32
33
|
_WALKFORWARD_TIMEDELTA_KEY = "walkforward_timedelta"
|
33
34
|
_DAYS_KEY = "days"
|
@@ -36,7 +37,6 @@ _TEST_SIZE_KEY = "test_size"
|
|
36
37
|
_VALIDATION_SIZE_KEY = "validation_size"
|
37
38
|
_IDX_USR_ATTR_KEY = "idx"
|
38
39
|
_DT_COLUMN_KEY = "dt_column"
|
39
|
-
_MAX_FEATURES_KEY = "max_features"
|
40
40
|
|
41
41
|
|
42
42
|
class Trainer(Fit):
|
@@ -54,7 +54,6 @@ class Trainer(Fit):
|
|
54
54
|
dt_column: str | None = None,
|
55
55
|
max_train_timeout: datetime.timedelta | None = None,
|
56
56
|
cutoff_dt: datetime.datetime | None = None,
|
57
|
-
max_features: int | None = None,
|
58
57
|
):
|
59
58
|
tqdm.tqdm.pandas()
|
60
59
|
|
@@ -105,7 +104,6 @@ class Trainer(Fit):
|
|
105
104
|
)
|
106
105
|
if dt_column is None:
|
107
106
|
dt_column = params[_DT_COLUMN_KEY]
|
108
|
-
max_features = params.get(_MAX_FEATURES_KEY)
|
109
107
|
else:
|
110
108
|
with open(params_file, "w", encoding="utf8") as handle:
|
111
109
|
validation_size_value = None
|
@@ -136,7 +134,6 @@ class Trainer(Fit):
|
|
136
134
|
_TEST_SIZE_KEY: test_size_value,
|
137
135
|
_VALIDATION_SIZE_KEY: validation_size_value,
|
138
136
|
_DT_COLUMN_KEY: dt_column,
|
139
|
-
_MAX_FEATURES_KEY: max_features,
|
140
137
|
},
|
141
138
|
handle,
|
142
139
|
)
|
@@ -147,7 +144,6 @@ class Trainer(Fit):
|
|
147
144
|
self._dt_column = dt_column
|
148
145
|
self._max_train_timeout = max_train_timeout
|
149
146
|
self._cutoff_dt = cutoff_dt
|
150
|
-
self._max_features = max_features
|
151
147
|
|
152
148
|
def _provide_study(self, column: str) -> optuna.Study:
|
153
149
|
storage_name = f"sqlite:///{self._folder}/{column}/{_STUDYDB_FILENAME}"
|
@@ -203,6 +199,20 @@ class Trainer(Fit):
|
|
203
199
|
) -> float:
|
204
200
|
print(f"Beginning trial for: {split_idx.isoformat()}")
|
205
201
|
trial.set_user_attr(_IDX_USR_ATTR_KEY, split_idx.isoformat())
|
202
|
+
folder = os.path.join(
|
203
|
+
self._folder, str(y_series.name), split_idx.isoformat()
|
204
|
+
)
|
205
|
+
os.makedirs(folder, exist_ok=True)
|
206
|
+
trial_file = os.path.join(folder, _TRIAL_FILENAME)
|
207
|
+
if os.path.exists(trial_file):
|
208
|
+
with open(trial_file, encoding="utf8") as handle:
|
209
|
+
trial_info = json.load(handle)
|
210
|
+
if trial_info["number"] == trial.number:
|
211
|
+
logging.info(
|
212
|
+
"Found trial %d previously executed, skipping...",
|
213
|
+
trial.number,
|
214
|
+
)
|
215
|
+
return trial_info["output"]
|
206
216
|
|
207
217
|
train_dt_index = dt_index[: len(x)]
|
208
218
|
x_train = x[train_dt_index < split_idx] # type: ignore
|
@@ -213,7 +223,7 @@ class Trainer(Fit):
|
|
213
223
|
try:
|
214
224
|
# Window the data
|
215
225
|
windower = Windower(self._dt_column)
|
216
|
-
windower.set_options(trial)
|
226
|
+
windower.set_options(trial, x)
|
217
227
|
x_train = windower.fit_transform(x_train)
|
218
228
|
y_train = y_train[-len(x_train) :]
|
219
229
|
if len(y_train.unique()) <= 1:
|
@@ -221,25 +231,25 @@ class Trainer(Fit):
|
|
221
231
|
return -1.0
|
222
232
|
|
223
233
|
# Perform common reductions
|
224
|
-
reducer = CombinedReducer(
|
225
|
-
reducer.set_options(trial)
|
234
|
+
reducer = CombinedReducer()
|
235
|
+
reducer.set_options(trial, x)
|
226
236
|
x_train = reducer.fit_transform(x_train)
|
227
237
|
x_test = reducer.transform(x_test)
|
228
238
|
|
229
239
|
# Calculate the row weights
|
230
240
|
weights = CombinedWeights()
|
231
|
-
weights.set_options(trial)
|
241
|
+
weights.set_options(trial, x)
|
232
242
|
w = weights.fit(x_train, y=y_train).transform(y_train.to_frame())[
|
233
243
|
WEIGHTS_COLUMN
|
234
244
|
]
|
235
245
|
|
236
246
|
# Create model
|
237
247
|
model = ModelRouter()
|
238
|
-
model.set_options(trial)
|
248
|
+
model.set_options(trial, x)
|
239
249
|
|
240
250
|
# Train
|
241
251
|
selector = Selector(model)
|
242
|
-
selector.set_options(trial)
|
252
|
+
selector.set_options(trial, x)
|
243
253
|
selector.fit(x_train, y=y_train, w=w, eval_x=x_test, eval_y=y_test)
|
244
254
|
x_train = selector.transform(x_train)
|
245
255
|
x_test = selector.transform(x_test)
|
@@ -249,27 +259,35 @@ class Trainer(Fit):
|
|
249
259
|
|
250
260
|
# Calibrate
|
251
261
|
calibrator = CalibratorRouter(model)
|
252
|
-
calibrator.set_options(trial)
|
262
|
+
calibrator.set_options(trial, x)
|
253
263
|
calibrator.fit(x_pred, y=y_train)
|
254
264
|
|
265
|
+
# Output
|
266
|
+
y_pred = model.transform(x_test)
|
267
|
+
y_pred = calibrator.transform(y_pred)
|
268
|
+
output = 0.0
|
269
|
+
if determine_model_type(y_series) == ModelType.REGRESSION:
|
270
|
+
output = float(r2_score(y_test, y_pred[[PREDICTION_COLUMN]]))
|
271
|
+
else:
|
272
|
+
output = float(f1_score(y_test, y_pred[[PREDICTION_COLUMN]]))
|
273
|
+
|
255
274
|
if save:
|
256
|
-
folder = os.path.join(
|
257
|
-
self._folder, str(y_series.name), split_idx.isoformat()
|
258
|
-
)
|
259
|
-
if not os.path.exists(folder):
|
260
|
-
os.mkdir(folder)
|
261
275
|
windower.save(folder, trial)
|
262
276
|
reducer.save(folder, trial)
|
263
277
|
weights.save(folder, trial)
|
264
278
|
model.save(folder, trial)
|
265
279
|
selector.save(folder, trial)
|
266
280
|
calibrator.save(folder, trial)
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
281
|
+
with open(trial_file, "w", encoding="utf8") as handle:
|
282
|
+
json.dump(
|
283
|
+
{
|
284
|
+
"number": trial.number,
|
285
|
+
"output": output,
|
286
|
+
},
|
287
|
+
handle,
|
288
|
+
)
|
289
|
+
|
290
|
+
return output
|
273
291
|
except WavetrainException as exc:
|
274
292
|
logging.warning(str(exc))
|
275
293
|
return -1.0
|
@@ -431,7 +449,7 @@ class Trainer(Fit):
|
|
431
449
|
date_str = dates[-1].isoformat()
|
432
450
|
folder = os.path.join(column_path, date_str)
|
433
451
|
|
434
|
-
reducer = CombinedReducer(
|
452
|
+
reducer = CombinedReducer()
|
435
453
|
reducer.load(folder)
|
436
454
|
|
437
455
|
model = ModelRouter()
|
@@ -27,7 +27,9 @@ class ClassWeights(Weights):
|
|
27
27
|
"""The name of the weight class."""
|
28
28
|
return "class"
|
29
29
|
|
30
|
-
def set_options(
|
30
|
+
def set_options(
|
31
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
32
|
+
) -> None:
|
31
33
|
pass
|
32
34
|
|
33
35
|
def load(self, folder: str) -> None:
|
@@ -23,9 +23,11 @@ class CombinedWeights(Weights):
|
|
23
23
|
def name(cls) -> str:
|
24
24
|
return "combined"
|
25
25
|
|
26
|
-
def set_options(
|
26
|
+
def set_options(
|
27
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
28
|
+
) -> None:
|
27
29
|
for weights in self._weights:
|
28
|
-
weights.set_options(trial)
|
30
|
+
weights.set_options(trial, df)
|
29
31
|
|
30
32
|
def load(self, folder: str) -> None:
|
31
33
|
for weights in self._weights:
|
@@ -19,7 +19,9 @@ class ExponentialWeights(Weights):
|
|
19
19
|
"""The name of the weight class."""
|
20
20
|
return "exponential"
|
21
21
|
|
22
|
-
def set_options(
|
22
|
+
def set_options(
|
23
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
24
|
+
) -> None:
|
23
25
|
pass
|
24
26
|
|
25
27
|
def load(self, folder: str) -> None:
|
@@ -19,7 +19,9 @@ class LinearWeights(Weights):
|
|
19
19
|
"""The name of the weight class."""
|
20
20
|
return "linear"
|
21
21
|
|
22
|
-
def set_options(
|
22
|
+
def set_options(
|
23
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
24
|
+
) -> None:
|
23
25
|
pass
|
24
26
|
|
25
27
|
def load(self, folder: str) -> None:
|
@@ -19,7 +19,9 @@ class NoopWeights(Weights):
|
|
19
19
|
"""The name of the weight class."""
|
20
20
|
return "noop"
|
21
21
|
|
22
|
-
def set_options(
|
22
|
+
def set_options(
|
23
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
24
|
+
) -> None:
|
23
25
|
pass
|
24
26
|
|
25
27
|
def load(self, folder: str) -> None:
|
@@ -20,7 +20,9 @@ class SigmoidWeights(Weights):
|
|
20
20
|
"""The name of the weight class."""
|
21
21
|
return "sigmoid"
|
22
22
|
|
23
|
-
def set_options(
|
23
|
+
def set_options(
|
24
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
25
|
+
) -> None:
|
24
26
|
pass
|
25
27
|
|
26
28
|
def load(self, folder: str) -> None:
|
@@ -38,7 +38,9 @@ class WeightsRouter(Weights):
|
|
38
38
|
def name(cls) -> str:
|
39
39
|
return "router"
|
40
40
|
|
41
|
-
def set_options(
|
41
|
+
def set_options(
|
42
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
43
|
+
) -> None:
|
42
44
|
self._weights = _WEIGHTS[
|
43
45
|
trial.suggest_categorical("weights", list(_WEIGHTS.keys()))
|
44
46
|
]()
|
@@ -28,7 +28,9 @@ class Windower(Params, Fit):
|
|
28
28
|
self._lookback_ratio = None
|
29
29
|
self._dt_column = dt_column
|
30
30
|
|
31
|
-
def set_options(
|
31
|
+
def set_options(
|
32
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
33
|
+
) -> None:
|
32
34
|
self._lookback_ratio = trial.suggest_float("lookback", 0.1, 1.0)
|
33
35
|
|
34
36
|
def load(self, folder: str) -> None:
|
@@ -40,8 +40,8 @@ wavetrainer/reducer/constant_reducer.py
|
|
40
40
|
wavetrainer/reducer/correlation_reducer.py
|
41
41
|
wavetrainer/reducer/duplicate_reducer.py
|
42
42
|
wavetrainer/reducer/nonnumeric_reducer.py
|
43
|
-
wavetrainer/reducer/pca_reducer.py
|
44
43
|
wavetrainer/reducer/reducer.py
|
44
|
+
wavetrainer/reducer/smart_correlation_reducer.py
|
45
45
|
wavetrainer/reducer/unseen_reducer.py
|
46
46
|
wavetrainer/selector/__init__.py
|
47
47
|
wavetrainer/selector/selector.py
|
@@ -1,77 +0,0 @@
|
|
1
|
-
"""A reducer that removes low variance columns."""
|
2
|
-
|
3
|
-
import os
|
4
|
-
from typing import Self
|
5
|
-
|
6
|
-
import joblib # type: ignore
|
7
|
-
import optuna
|
8
|
-
import pandas as pd
|
9
|
-
from sklearn.decomposition import PCA # type: ignore
|
10
|
-
from sklearn.preprocessing import StandardScaler # type: ignore
|
11
|
-
|
12
|
-
from .reducer import Reducer
|
13
|
-
|
14
|
-
_PCA_FILE = "pca.joblib"
|
15
|
-
_PCA_SCALER_FILE = "pca_scaler.joblib"
|
16
|
-
|
17
|
-
|
18
|
-
class PCAReducer(Reducer):
|
19
|
-
"""A class that removes low variance columns from a dataframe."""
|
20
|
-
|
21
|
-
# pylint: disable=too-many-positional-arguments,too-many-arguments
|
22
|
-
|
23
|
-
def __init__(self, max_features: int | None):
|
24
|
-
super().__init__()
|
25
|
-
self._max_features = max_features
|
26
|
-
if max_features is not None:
|
27
|
-
self._scaler = StandardScaler()
|
28
|
-
self._pca = PCA(n_components=max_features)
|
29
|
-
else:
|
30
|
-
self._scaler = None
|
31
|
-
self._pca = None
|
32
|
-
|
33
|
-
@classmethod
|
34
|
-
def name(cls) -> str:
|
35
|
-
return "pca"
|
36
|
-
|
37
|
-
def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
|
38
|
-
pass
|
39
|
-
|
40
|
-
def load(self, folder: str) -> None:
|
41
|
-
pca_scaler_file = os.path.join(folder, _PCA_SCALER_FILE)
|
42
|
-
pca_file = os.path.join(folder, _PCA_FILE)
|
43
|
-
if os.path.exists(pca_scaler_file):
|
44
|
-
self._scaler = joblib.load(pca_scaler_file)
|
45
|
-
if os.path.exists(pca_file):
|
46
|
-
self._pca = joblib.load(pca_file)
|
47
|
-
|
48
|
-
def save(self, folder: str, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
|
49
|
-
if self._scaler is not None:
|
50
|
-
joblib.dump(self._scaler, os.path.join(folder, _PCA_SCALER_FILE))
|
51
|
-
if self._pca is not None:
|
52
|
-
joblib.dump(self._pca, os.path.join(folder, _PCA_FILE))
|
53
|
-
|
54
|
-
def fit(
|
55
|
-
self,
|
56
|
-
df: pd.DataFrame,
|
57
|
-
y: pd.Series | pd.DataFrame | None = None,
|
58
|
-
w: pd.Series | None = None,
|
59
|
-
eval_x: pd.DataFrame | None = None,
|
60
|
-
eval_y: pd.Series | pd.DataFrame | None = None,
|
61
|
-
) -> Self:
|
62
|
-
pca = self._pca
|
63
|
-
scaler = self._scaler
|
64
|
-
if pca is None or scaler is None:
|
65
|
-
return self
|
66
|
-
if len(df.columns.values) < pca.n_components: # type: ignore
|
67
|
-
return self
|
68
|
-
x_scaled = scaler.fit_transform(df)
|
69
|
-
pca.fit(x_scaled)
|
70
|
-
return self
|
71
|
-
|
72
|
-
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
73
|
-
if self._pca is None:
|
74
|
-
return df
|
75
|
-
if len(df.columns.values) < self._pca.n_components: # type: ignore
|
76
|
-
return df
|
77
|
-
return self._pca.transform(df)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|