wavetrainer 0.0.4__tar.gz → 0.0.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {wavetrainer-0.0.4/wavetrainer.egg-info → wavetrainer-0.0.5}/PKG-INFO +1 -1
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/setup.py +1 -1
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/tests/trainer_test.py +1 -1
- wavetrainer-0.0.5/wavetrainer/__init__.py +6 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/calibrator/mapie_calibrator.py +13 -8
- wavetrainer-0.0.5/wavetrainer/model/catboost_model.py +187 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/model/model.py +6 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/model/model_router.py +7 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/model_type.py +6 -6
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/selector/selector.py +19 -13
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/trainer.py +11 -5
- {wavetrainer-0.0.4 → wavetrainer-0.0.5/wavetrainer.egg-info}/PKG-INFO +1 -1
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer.egg-info/SOURCES.txt +0 -1
- wavetrainer-0.0.4/wavetrainer/__init__.py +0 -10
- wavetrainer-0.0.4/wavetrainer/load.py +0 -8
- wavetrainer-0.0.4/wavetrainer/model/catboost_model.py +0 -80
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/LICENSE +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/MANIFEST.in +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/README.md +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/requirements.txt +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/setup.cfg +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/tests/__init__.py +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/calibrator/__init__.py +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/calibrator/calibrator.py +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/calibrator/calibrator_router.py +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/calibrator/vennabers_calibrator.py +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/create.py +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/exceptions.py +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/fit.py +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/model/__init__.py +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/params.py +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/reducer/__init__.py +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/reducer/base_selector_reducer.py +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/reducer/combined_reducer.py +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/reducer/constant_reducer.py +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/reducer/correlation_reducer.py +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/reducer/duplicate_reducer.py +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/reducer/nonnumeric_reducer.py +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/reducer/reducer.py +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/selector/__init__.py +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/weights/__init__.py +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/weights/class_weights.py +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/weights/combined_weights.py +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/weights/exponential_weights.py +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/weights/linear_weights.py +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/weights/noop_weights.py +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/weights/sigmoid_weights.py +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/weights/weights.py +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/weights/weights_router.py +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/windower/__init__.py +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer/windower/windower.py +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer.egg-info/dependency_links.txt +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer.egg-info/not-zip-safe +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer.egg-info/requires.txt +0 -0
- {wavetrainer-0.0.4 → wavetrainer-0.0.5}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
|
|
23
23
|
|
24
24
|
setup(
|
25
25
|
name='wavetrainer',
|
26
|
-
version='0.0.
|
26
|
+
version='0.0.5',
|
27
27
|
description='A library for automatically finding the optimal model within feature and hyperparameter space.',
|
28
28
|
long_description=long_description,
|
29
29
|
long_description_content_type='text/markdown',
|
@@ -13,7 +13,7 @@ class TestTrainer(unittest.TestCase):
|
|
13
13
|
|
14
14
|
def test_trainer(self):
|
15
15
|
with tempfile.TemporaryDirectory() as tmpdir:
|
16
|
-
trainer = Trainer(tmpdir, walkforward_timedelta=datetime.timedelta(days=
|
16
|
+
trainer = Trainer(tmpdir, walkforward_timedelta=datetime.timedelta(days=7), trials=1)
|
17
17
|
x_data = [i for i in range(100)]
|
18
18
|
x_index = [datetime.datetime(2022, 1, 1) + datetime.timedelta(days=i) for i in range(len(x_data))]
|
19
19
|
df = pd.DataFrame(
|
@@ -49,12 +49,17 @@ class MAPIECalibrator(Calibrator):
|
|
49
49
|
return self
|
50
50
|
|
51
51
|
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
52
|
-
alpha = [
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
)
|
52
|
+
alpha = []
|
53
|
+
for potential_alpha in [0.05, 0.32]:
|
54
|
+
if len(df) > int(1.0 / potential_alpha):
|
55
|
+
alpha.append(potential_alpha)
|
56
|
+
if alpha:
|
57
|
+
_, y_pis = self._mapie.predict(df, alpha=alpha)
|
58
|
+
for i in range(y_pis.shape[1]):
|
59
|
+
if i >= len(alpha):
|
60
|
+
continue
|
61
|
+
for ii in range(y_pis.shape[2]):
|
62
|
+
alpha_val = alpha[i]
|
63
|
+
values = y_pis[:, i, ii].flatten().tolist()
|
64
|
+
df[f"{PROBABILITY_COLUMN_PREFIX}{alpha_val}_{ii == 1}"] = values
|
60
65
|
return df
|
@@ -0,0 +1,187 @@
|
|
1
|
+
"""A model that wraps catboost."""
|
2
|
+
|
3
|
+
import json
|
4
|
+
import os
|
5
|
+
from typing import Any, Self
|
6
|
+
|
7
|
+
import optuna
|
8
|
+
import pandas as pd
|
9
|
+
from catboost import CatBoostClassifier # type: ignore
|
10
|
+
from catboost import CatBoost, CatBoostRegressor, Pool
|
11
|
+
|
12
|
+
from ..model_type import ModelType, determine_model_type
|
13
|
+
from .model import PREDICTION_COLUMN, PROBABILITY_COLUMN_PREFIX, Model
|
14
|
+
|
15
|
+
_MODEL_FILENAME = "model.cbm"
|
16
|
+
_MODEL_PARAMS_FILENAME = "model_params.json"
|
17
|
+
_ITERATIONS_KEY = "iterations"
|
18
|
+
_LEARNING_RATE_KEY = "learning_rate"
|
19
|
+
_DEPTH_KEY = "depth"
|
20
|
+
_L2_LEAF_REG_KEY = "l2_leaf_reg"
|
21
|
+
_BOOSTING_TYPE_KEY = "boosting_type"
|
22
|
+
_MODEL_TYPE_KEY = "model_type"
|
23
|
+
|
24
|
+
|
25
|
+
class CatboostModel(Model):
|
26
|
+
"""A class that uses Catboost as a model."""
|
27
|
+
|
28
|
+
_catboost: CatBoost | None
|
29
|
+
_iterations: None | int
|
30
|
+
_learning_rate: None | float
|
31
|
+
_depth: None | int
|
32
|
+
_l2_leaf_reg: None | float
|
33
|
+
_boosting_type: None | str
|
34
|
+
_model_type: None | ModelType
|
35
|
+
|
36
|
+
@classmethod
|
37
|
+
def name(cls) -> str:
|
38
|
+
return "catboost"
|
39
|
+
|
40
|
+
def __init__(self) -> None:
|
41
|
+
super().__init__()
|
42
|
+
self._catboost = None
|
43
|
+
self._iterations = None
|
44
|
+
self._learning_rate = None
|
45
|
+
self._depth = None
|
46
|
+
self._l2_leaf_reg = None
|
47
|
+
self._boosting_type = None
|
48
|
+
self._model_type = None
|
49
|
+
|
50
|
+
@property
|
51
|
+
def estimator(self) -> Any:
|
52
|
+
return self._provide_catboost()
|
53
|
+
|
54
|
+
def pre_fit(self, y: pd.Series | pd.DataFrame | None):
|
55
|
+
if y is None:
|
56
|
+
raise ValueError("y is null.")
|
57
|
+
self._model_type = determine_model_type(y)
|
58
|
+
|
59
|
+
def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
|
60
|
+
self._iterations = trial.suggest_int(_ITERATIONS_KEY, 100, 10000)
|
61
|
+
self._learning_rate = trial.suggest_float(_LEARNING_RATE_KEY, 0.001, 0.3)
|
62
|
+
self._depth = trial.suggest_int(_DEPTH_KEY, 1, 12)
|
63
|
+
self._l2_leaf_reg = trial.suggest_float(_L2_LEAF_REG_KEY, 3.0, 50.0)
|
64
|
+
self._boosting_type = trial.suggest_categorical(
|
65
|
+
_BOOSTING_TYPE_KEY, ["Ordered", "Plain"]
|
66
|
+
)
|
67
|
+
|
68
|
+
def load(self, folder: str) -> None:
|
69
|
+
with open(
|
70
|
+
os.path.join(folder, _MODEL_PARAMS_FILENAME), encoding="utf8"
|
71
|
+
) as handle:
|
72
|
+
params = json.load(handle)
|
73
|
+
self._iterations = params[_ITERATIONS_KEY]
|
74
|
+
self._learning_rate = params[_LEARNING_RATE_KEY]
|
75
|
+
self._depth = params[_DEPTH_KEY]
|
76
|
+
self._l2_leaf_reg = params[_L2_LEAF_REG_KEY]
|
77
|
+
self._boosting_type = params[_BOOSTING_TYPE_KEY]
|
78
|
+
self._model_type = ModelType(params[_MODEL_TYPE_KEY])
|
79
|
+
catboost = self._provide_catboost()
|
80
|
+
catboost.load_model(os.path.join(folder, _MODEL_FILENAME))
|
81
|
+
|
82
|
+
def save(self, folder: str) -> None:
|
83
|
+
with open(
|
84
|
+
os.path.join(folder, _MODEL_PARAMS_FILENAME), "w", encoding="utf8"
|
85
|
+
) as handle:
|
86
|
+
json.dump(
|
87
|
+
{
|
88
|
+
_ITERATIONS_KEY: self._iterations,
|
89
|
+
_LEARNING_RATE_KEY: self._learning_rate,
|
90
|
+
_DEPTH_KEY: self._depth,
|
91
|
+
_L2_LEAF_REG_KEY: self._l2_leaf_reg,
|
92
|
+
_BOOSTING_TYPE_KEY: self._boosting_type,
|
93
|
+
_MODEL_TYPE_KEY: str(self._model_type),
|
94
|
+
},
|
95
|
+
handle,
|
96
|
+
)
|
97
|
+
catboost = self._provide_catboost()
|
98
|
+
catboost.save_model(os.path.join(folder, _MODEL_FILENAME))
|
99
|
+
|
100
|
+
def fit(
|
101
|
+
self,
|
102
|
+
df: pd.DataFrame,
|
103
|
+
y: pd.Series | pd.DataFrame | None = None,
|
104
|
+
w: pd.Series | None = None,
|
105
|
+
) -> Self:
|
106
|
+
if y is None:
|
107
|
+
raise ValueError("y is null.")
|
108
|
+
self._model_type = determine_model_type(y)
|
109
|
+
catboost = self._provide_catboost()
|
110
|
+
|
111
|
+
train_pool = Pool(
|
112
|
+
df,
|
113
|
+
label=y,
|
114
|
+
weight=w,
|
115
|
+
)
|
116
|
+
catboost.fit(
|
117
|
+
train_pool,
|
118
|
+
early_stopping_rounds=100,
|
119
|
+
verbose=False,
|
120
|
+
metric_period=100,
|
121
|
+
)
|
122
|
+
return self
|
123
|
+
|
124
|
+
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
125
|
+
pred_pool = Pool(df)
|
126
|
+
catboost = self._provide_catboost()
|
127
|
+
pred = catboost.predict(pred_pool)
|
128
|
+
df = pd.DataFrame(
|
129
|
+
index=df.index,
|
130
|
+
data={
|
131
|
+
PREDICTION_COLUMN: pred.flatten(),
|
132
|
+
},
|
133
|
+
)
|
134
|
+
if self._model_type != ModelType.REGRESSION:
|
135
|
+
proba = catboost.predict_proba(pred_pool) # type: ignore
|
136
|
+
for i in range(proba.shape[1]):
|
137
|
+
df[f"{PROBABILITY_COLUMN_PREFIX}{i}"] = proba[:, i]
|
138
|
+
return df
|
139
|
+
|
140
|
+
def _provide_catboost(self) -> CatBoost:
|
141
|
+
catboost = self._catboost
|
142
|
+
if catboost is None:
|
143
|
+
match self._model_type:
|
144
|
+
case ModelType.BINARY:
|
145
|
+
catboost = CatBoostClassifier(
|
146
|
+
iterations=self._iterations,
|
147
|
+
learning_rate=self._learning_rate,
|
148
|
+
depth=self._depth,
|
149
|
+
l2_leaf_reg=self._l2_leaf_reg,
|
150
|
+
boosting_type=self._boosting_type,
|
151
|
+
early_stopping_rounds=100,
|
152
|
+
metric_period=100,
|
153
|
+
)
|
154
|
+
case ModelType.REGRESSION:
|
155
|
+
catboost = CatBoostRegressor(
|
156
|
+
iterations=self._iterations,
|
157
|
+
learning_rate=self._learning_rate,
|
158
|
+
depth=self._depth,
|
159
|
+
l2_leaf_reg=self._l2_leaf_reg,
|
160
|
+
boosting_type=self._boosting_type,
|
161
|
+
early_stopping_rounds=100,
|
162
|
+
metric_period=100,
|
163
|
+
)
|
164
|
+
case ModelType.BINNED_BINARY:
|
165
|
+
catboost = CatBoostClassifier(
|
166
|
+
iterations=self._iterations,
|
167
|
+
learning_rate=self._learning_rate,
|
168
|
+
depth=self._depth,
|
169
|
+
l2_leaf_reg=self._l2_leaf_reg,
|
170
|
+
boosting_type=self._boosting_type,
|
171
|
+
early_stopping_rounds=100,
|
172
|
+
metric_period=100,
|
173
|
+
)
|
174
|
+
case ModelType.MULTI_CLASSIFICATION:
|
175
|
+
catboost = CatBoostClassifier(
|
176
|
+
iterations=self._iterations,
|
177
|
+
learning_rate=self._learning_rate,
|
178
|
+
depth=self._depth,
|
179
|
+
l2_leaf_reg=self._l2_leaf_reg,
|
180
|
+
boosting_type=self._boosting_type,
|
181
|
+
early_stopping_rounds=100,
|
182
|
+
metric_period=100,
|
183
|
+
)
|
184
|
+
self._catboost = catboost
|
185
|
+
if catboost is None:
|
186
|
+
raise ValueError("catboost is null")
|
187
|
+
return catboost
|
@@ -2,6 +2,8 @@
|
|
2
2
|
|
3
3
|
from typing import Any
|
4
4
|
|
5
|
+
import pandas as pd
|
6
|
+
|
5
7
|
from ..fit import Fit
|
6
8
|
from ..params import Params
|
7
9
|
|
@@ -21,3 +23,7 @@ class Model(Params, Fit):
|
|
21
23
|
def estimator(self) -> Any:
|
22
24
|
"""The estimator backing the model."""
|
23
25
|
raise NotImplementedError("estimator not implemented in parent class.")
|
26
|
+
|
27
|
+
def pre_fit(self, y: pd.Series | pd.DataFrame | None) -> None:
|
28
|
+
"""A call to make sure the model is prepared for the target type."""
|
29
|
+
raise NotImplementedError("pre_fit not implemented in parent class.")
|
@@ -37,10 +37,17 @@ class ModelRouter(Model):
|
|
37
37
|
raise ValueError("model is null")
|
38
38
|
return model.estimator
|
39
39
|
|
40
|
+
def pre_fit(self, y: pd.Series | pd.DataFrame | None):
|
41
|
+
model = self._model
|
42
|
+
if model is None:
|
43
|
+
raise ValueError("model is null")
|
44
|
+
model.pre_fit(y)
|
45
|
+
|
40
46
|
def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
|
41
47
|
self._model = _MODELS[
|
42
48
|
trial.suggest_categorical("model", list(_MODELS.keys()))
|
43
49
|
]()
|
50
|
+
self._model.set_options(trial)
|
44
51
|
|
45
52
|
def load(self, folder: str) -> None:
|
46
53
|
with open(os.path.join(folder, _MODEL_ROUTER_FILE), encoding="utf8") as handle:
|
@@ -1,17 +1,17 @@
|
|
1
1
|
"""An enum to define the model type."""
|
2
2
|
|
3
|
-
from enum import
|
3
|
+
from enum import StrEnum, auto
|
4
4
|
|
5
5
|
import pandas as pd
|
6
6
|
|
7
7
|
|
8
|
-
class ModelType(
|
8
|
+
class ModelType(StrEnum):
|
9
9
|
"""The type of model being run."""
|
10
10
|
|
11
|
-
BINARY =
|
12
|
-
REGRESSION =
|
13
|
-
BINNED_BINARY =
|
14
|
-
MULTI_CLASSIFICATION =
|
11
|
+
BINARY = auto()
|
12
|
+
REGRESSION = auto()
|
13
|
+
BINNED_BINARY = auto()
|
14
|
+
MULTI_CLASSIFICATION = auto()
|
15
15
|
|
16
16
|
|
17
17
|
def determine_model_type(y: pd.Series | pd.DataFrame) -> ModelType:
|
@@ -19,24 +19,18 @@ _SELECTOR_FILE = "selector.joblib"
|
|
19
19
|
class Selector(Params, Fit):
|
20
20
|
"""The selector class."""
|
21
21
|
|
22
|
-
|
22
|
+
_selector: RFE | None
|
23
|
+
|
24
|
+
def __init__(self, model: Model):
|
23
25
|
super().__init__()
|
24
26
|
self._model = model
|
25
27
|
self._feature_ratio = 0.0
|
26
28
|
self._steps = 0
|
27
|
-
|
28
|
-
self._selector = RFE(
|
29
|
-
model.estimator,
|
30
|
-
n_features_to_select=n_features_to_select,
|
31
|
-
step=self._steps,
|
32
|
-
verbose=1,
|
33
|
-
)
|
29
|
+
self._selector = None
|
34
30
|
|
35
31
|
def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
|
36
32
|
self._feature_ratio = trial.suggest_float("feature_ratio", 0.0, 1.0)
|
37
|
-
|
38
|
-
self._steps = steps
|
39
|
-
self._selector.step = steps
|
33
|
+
self._steps = trial.suggest_int("steps", 1, 16)
|
40
34
|
|
41
35
|
def load(self, folder: str) -> None:
|
42
36
|
self._selector = joblib.load(os.path.join(folder, _SELECTOR_FILE))
|
@@ -50,9 +44,18 @@ class Selector(Params, Fit):
|
|
50
44
|
y: pd.Series | pd.DataFrame | None = None,
|
51
45
|
w: pd.Series | None = None,
|
52
46
|
) -> Self:
|
47
|
+
self._model.pre_fit(y)
|
53
48
|
if not isinstance(y, pd.Series):
|
54
49
|
raise ValueError("y is not a series.")
|
55
|
-
|
50
|
+
n_features_to_select = max(1, int(len(df.columns) * self._feature_ratio))
|
51
|
+
self._selector = RFE(
|
52
|
+
self._model.estimator,
|
53
|
+
n_features_to_select=n_features_to_select,
|
54
|
+
step=max(
|
55
|
+
1,
|
56
|
+
int((len(df.columns) - n_features_to_select) / self._steps),
|
57
|
+
),
|
58
|
+
)
|
56
59
|
try:
|
57
60
|
self._selector.fit(df, y=y, sample_weight=w)
|
58
61
|
except ValueError as exc:
|
@@ -61,8 +64,11 @@ class Selector(Params, Fit):
|
|
61
64
|
return self
|
62
65
|
|
63
66
|
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
67
|
+
selector = self._selector
|
68
|
+
if selector is None:
|
69
|
+
raise ValueError("selector is null.")
|
64
70
|
try:
|
65
|
-
return df[
|
71
|
+
return df[selector.get_feature_names_out()]
|
66
72
|
except AttributeError as exc:
|
67
73
|
# Catch issues with 1 feature as a reduction target.
|
68
74
|
logging.warning(str(exc))
|
@@ -11,7 +11,7 @@ from typing import Self
|
|
11
11
|
import optuna
|
12
12
|
import pandas as pd
|
13
13
|
import tqdm
|
14
|
-
from sklearn.metrics import
|
14
|
+
from sklearn.metrics import f1_score, mean_absolute_error # type: ignore
|
15
15
|
|
16
16
|
from .calibrator.calibrator_router import CalibratorRouter
|
17
17
|
from .exceptions import WavetrainException
|
@@ -215,7 +215,7 @@ class Trainer(Fit):
|
|
215
215
|
model.set_options(trial)
|
216
216
|
|
217
217
|
# Train
|
218
|
-
selector = Selector(model
|
218
|
+
selector = Selector(model)
|
219
219
|
selector.set_options(trial)
|
220
220
|
selector.fit(x_train, y=y_train, w=w)
|
221
221
|
x_train = selector.transform(x_train)
|
@@ -243,7 +243,7 @@ class Trainer(Fit):
|
|
243
243
|
y_pred = model.transform(x_test)
|
244
244
|
y_pred = calibrator.transform(y_pred)
|
245
245
|
if determine_model_type(y_series) == ModelType.REGRESSION:
|
246
|
-
return
|
246
|
+
return mean_absolute_error(y_test, y_pred[[PREDICTION_COLUMN]])
|
247
247
|
return f1_score(y_test, y_pred[[PREDICTION_COLUMN]])
|
248
248
|
except WavetrainException as exc:
|
249
249
|
logging.warning(str(exc))
|
@@ -286,9 +286,15 @@ class Trainer(Fit):
|
|
286
286
|
train_len = len(df[dt_index < start_test_index])
|
287
287
|
test_len = len(df.loc[start_test_index:start_validation_index])
|
288
288
|
|
289
|
+
last_processed_dt = None
|
289
290
|
for count, test_idx in tqdm.tqdm(
|
290
|
-
enumerate(
|
291
|
+
enumerate(test_dt_index[test_dt_index >= start_test_index])
|
291
292
|
):
|
293
|
+
if (
|
294
|
+
last_processed_dt is not None
|
295
|
+
and test_idx < last_processed_dt + self._walkforward_timedelta
|
296
|
+
):
|
297
|
+
continue
|
292
298
|
test_dt = test_idx.to_pydatetime()
|
293
299
|
found = False
|
294
300
|
for trial in study.trials:
|
@@ -373,7 +379,7 @@ class Trainer(Fit):
|
|
373
379
|
model = ModelRouter()
|
374
380
|
model.load(folder)
|
375
381
|
|
376
|
-
selector = Selector(model
|
382
|
+
selector = Selector(model)
|
377
383
|
selector.load(folder)
|
378
384
|
|
379
385
|
calibrator = CalibratorRouter(model)
|
@@ -1,80 +0,0 @@
|
|
1
|
-
"""A model that wraps catboost."""
|
2
|
-
|
3
|
-
import os
|
4
|
-
from typing import Any, Self
|
5
|
-
|
6
|
-
import optuna
|
7
|
-
import pandas as pd
|
8
|
-
from catboost import CatBoostClassifier, Pool # type: ignore
|
9
|
-
|
10
|
-
from .model import PREDICTION_COLUMN, PROBABILITY_COLUMN_PREFIX, Model
|
11
|
-
|
12
|
-
_MODEL_FILENAME = "model.cbm"
|
13
|
-
|
14
|
-
|
15
|
-
class CatboostModel(Model):
|
16
|
-
"""A class that uses Catboost as a model."""
|
17
|
-
|
18
|
-
@classmethod
|
19
|
-
def name(cls) -> str:
|
20
|
-
return "catboost"
|
21
|
-
|
22
|
-
def __init__(self) -> None:
|
23
|
-
super().__init__()
|
24
|
-
self._catboost = CatBoostClassifier()
|
25
|
-
|
26
|
-
@property
|
27
|
-
def estimator(self) -> Any:
|
28
|
-
return self._catboost
|
29
|
-
|
30
|
-
def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
|
31
|
-
iterations = trial.suggest_int("iterations", 100, 10000)
|
32
|
-
learning_rate = trial.suggest_float("learning_rate", 0.001, 0.3)
|
33
|
-
depth = trial.suggest_int("depth", 1, 12)
|
34
|
-
l2_leaf_reg = trial.suggest_float("l2_leaf_reg", 3.0, 50.0)
|
35
|
-
boosting_type = trial.suggest_categorical("boosting_type", ["Ordered", "Plain"])
|
36
|
-
self._catboost.set_params(
|
37
|
-
iterations=iterations,
|
38
|
-
learning_rate=learning_rate,
|
39
|
-
depth=depth,
|
40
|
-
l2_leaf_reg=l2_leaf_reg,
|
41
|
-
boosting_type=boosting_type,
|
42
|
-
early_stopping_rounds=100,
|
43
|
-
)
|
44
|
-
|
45
|
-
def load(self, folder: str) -> None:
|
46
|
-
self._catboost.load_model(os.path.join(folder, _MODEL_FILENAME))
|
47
|
-
|
48
|
-
def save(self, folder: str) -> None:
|
49
|
-
self._catboost.save_model(os.path.join(folder, _MODEL_FILENAME))
|
50
|
-
|
51
|
-
def fit(
|
52
|
-
self,
|
53
|
-
df: pd.DataFrame,
|
54
|
-
y: pd.Series | pd.DataFrame | None = None,
|
55
|
-
w: pd.Series | None = None,
|
56
|
-
) -> Self:
|
57
|
-
train_pool = Pool(
|
58
|
-
df,
|
59
|
-
label=y,
|
60
|
-
weight=w,
|
61
|
-
)
|
62
|
-
self._catboost.fit(
|
63
|
-
train_pool,
|
64
|
-
early_stopping_rounds=100,
|
65
|
-
)
|
66
|
-
return self
|
67
|
-
|
68
|
-
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
69
|
-
pred_pool = Pool(df)
|
70
|
-
pred = self._catboost.predict(pred_pool)
|
71
|
-
proba = self._catboost.predict_proba(pred_pool)
|
72
|
-
df = pd.DataFrame(
|
73
|
-
index=df.index,
|
74
|
-
data={
|
75
|
-
PREDICTION_COLUMN: pred.flatten(),
|
76
|
-
},
|
77
|
-
)
|
78
|
-
for i in range(proba.shape[1]):
|
79
|
-
df[f"{PROBABILITY_COLUMN_PREFIX}{i}"] = proba[:, i]
|
80
|
-
return df
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|