wavetrainer 0.0.42__tar.gz → 0.0.44__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {wavetrainer-0.0.42/wavetrainer.egg-info → wavetrainer-0.0.44}/PKG-INFO +1 -1
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/setup.py +1 -1
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/tests/trainer_test.py +1 -1
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/__init__.py +1 -1
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/calibrator/calibrator_router.py +3 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/calibrator/mapie_calibrator.py +9 -6
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/model/catboost/catboost_kwargs.py +10 -7
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/model/catboost/catboost_model.py +11 -4
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/model/model.py +8 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/model/model_router.py +12 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/model/tabpfn/tabpfn_model.py +6 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/model/xgboost/xgboost_model.py +21 -4
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/reducer/base_selector_reducer.py +0 -3
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/reducer/combined_reducer.py +3 -2
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/reducer/correlation_reducer.py +1 -1
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/reducer/smart_correlation_reducer.py +6 -1
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/selector/selector.py +8 -2
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/trainer.py +10 -6
- {wavetrainer-0.0.42 → wavetrainer-0.0.44/wavetrainer.egg-info}/PKG-INFO +1 -1
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/LICENSE +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/MANIFEST.in +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/README.md +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/requirements.txt +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/setup.cfg +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/tests/__init__.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/tests/model/__init__.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/tests/model/catboost_kwargs_test.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/calibrator/__init__.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/calibrator/calibrator.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/calibrator/vennabers_calibrator.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/create.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/exceptions.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/fit.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/model/__init__.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/model/catboost/__init__.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/model/catboost/catboost_classifier_wrap.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/model/catboost/catboost_regressor_wrap.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/model/tabpfn/__init__.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/model/xgboost/__init__.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/model/xgboost/early_stopper.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/model/xgboost/xgboost_logger.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/model_type.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/params.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/reducer/__init__.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/reducer/constant_reducer.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/reducer/duplicate_reducer.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/reducer/non_categorical_numeric_columns.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/reducer/nonnumeric_reducer.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/reducer/reducer.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/reducer/select_by_single_feature_performance_reducer.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/reducer/unseen_reducer.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/selector/__init__.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/weights/__init__.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/weights/class_weights.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/weights/combined_weights.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/weights/exponential_weights.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/weights/linear_weights.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/weights/noop_weights.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/weights/sigmoid_weights.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/weights/weights.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/weights/weights_router.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/windower/__init__.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/windower/windower.py +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer.egg-info/SOURCES.txt +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer.egg-info/dependency_links.txt +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer.egg-info/not-zip-safe +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer.egg-info/requires.txt +0 -0
- {wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
|
|
23
23
|
|
24
24
|
setup(
|
25
25
|
name='wavetrainer',
|
26
|
-
version='0.0.
|
26
|
+
version='0.0.44',
|
27
27
|
description='A library for automatically finding the optimal model within feature and hyperparameter space.',
|
28
28
|
long_description=long_description,
|
29
29
|
long_description_content_type='text/markdown',
|
@@ -40,7 +40,7 @@ class TestTrainer(unittest.TestCase):
|
|
40
40
|
|
41
41
|
def test_trainer_dt_column(self):
|
42
42
|
with tempfile.TemporaryDirectory() as tmpdir:
|
43
|
-
trainer = Trainer(tmpdir, walkforward_timedelta=datetime.timedelta(days=7), trials=
|
43
|
+
trainer = Trainer(tmpdir, walkforward_timedelta=datetime.timedelta(days=7), trials=5, dt_column="dt_column")
|
44
44
|
x_data = [i for i in range(100)]
|
45
45
|
x_index = [datetime.datetime(2022, 1, 1) + datetime.timedelta(days=i) for i in range(len(x_data))]
|
46
46
|
df = pd.DataFrame(
|
@@ -37,6 +37,9 @@ class CalibratorRouter(Calibrator):
|
|
37
37
|
return "router"
|
38
38
|
|
39
39
|
def predictions_as_x(self, y: pd.Series | pd.DataFrame | None = None) -> bool:
|
40
|
+
calibrator = self._calibrator
|
41
|
+
if calibrator is not None:
|
42
|
+
return calibrator.predictions_as_x(None)
|
40
43
|
if y is None:
|
41
44
|
raise ValueError("y is null")
|
42
45
|
if determine_model_type(y) == ModelType.REGRESSION:
|
@@ -21,7 +21,7 @@ class MAPIECalibrator(Calibrator):
|
|
21
21
|
|
22
22
|
def __init__(self, model: Model):
|
23
23
|
super().__init__(model)
|
24
|
-
self._mapie = MapieRegressor(model.create_estimator(), method="plus")
|
24
|
+
self._mapie = MapieRegressor(model.create_estimator(), method="plus", cv=5)
|
25
25
|
|
26
26
|
@classmethod
|
27
27
|
def name(cls) -> str:
|
@@ -54,19 +54,22 @@ class MAPIECalibrator(Calibrator):
|
|
54
54
|
raise ValueError("mapie is null")
|
55
55
|
if y is None:
|
56
56
|
raise ValueError("y is null")
|
57
|
-
|
58
|
-
return self
|
59
|
-
mapie.fit(df.to_numpy(), y.to_numpy())
|
57
|
+
mapie.fit(self._model.convert_df(df), y)
|
60
58
|
return self
|
61
59
|
|
62
60
|
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
63
61
|
alpha = []
|
64
62
|
for potential_alpha in [0.05, 0.32]:
|
65
|
-
if
|
63
|
+
if (
|
64
|
+
len(df) > int(1.0 / potential_alpha) + 1
|
65
|
+
and len(df) > int(1.0 / (1.0 - potential_alpha)) + 1
|
66
|
+
):
|
66
67
|
alpha.append(potential_alpha)
|
67
68
|
ret_df = pd.DataFrame(index=df.index)
|
68
69
|
if alpha:
|
69
|
-
_, y_pis = self._mapie.predict(
|
70
|
+
_, y_pis = self._mapie.predict(
|
71
|
+
self._model.convert_df(df), alpha=alpha, allow_infinite_bounds=True
|
72
|
+
)
|
70
73
|
for i in range(y_pis.shape[1]):
|
71
74
|
if i >= len(alpha):
|
72
75
|
continue
|
@@ -3,6 +3,7 @@
|
|
3
3
|
from typing import Any
|
4
4
|
|
5
5
|
import numpy as np
|
6
|
+
import pandas as pd
|
6
7
|
from catboost import Pool # type: ignore
|
7
8
|
|
8
9
|
ORIGINAL_X_ARG_KEY = "original_x"
|
@@ -12,12 +13,17 @@ CAT_FEATURES_ARG_KEY = "cat_features"
|
|
12
13
|
|
13
14
|
def handle_fit_kwargs(*args, **kwargs) -> tuple[tuple[Any, ...], dict[str, Any]]:
|
14
15
|
"""Handles keyword args coming into a catboost fit method."""
|
16
|
+
args_list = list(args)
|
17
|
+
fit_x = args_list[0]
|
18
|
+
|
19
|
+
cat_features = kwargs.get(CAT_FEATURES_ARG_KEY)
|
20
|
+
if cat_features is None and isinstance(fit_x, pd.DataFrame):
|
21
|
+
cat_features = fit_x.select_dtypes(include="category").columns.tolist()
|
22
|
+
kwargs[CAT_FEATURES_ARG_KEY] = cat_features
|
23
|
+
|
15
24
|
if ORIGINAL_X_ARG_KEY in kwargs:
|
16
25
|
df = kwargs[ORIGINAL_X_ARG_KEY]
|
17
26
|
eval_x, eval_y = kwargs[EVAL_SET_ARG_KEY]
|
18
|
-
cat_features = kwargs[CAT_FEATURES_ARG_KEY]
|
19
|
-
args_list = list(args)
|
20
|
-
fit_x = args_list[0]
|
21
27
|
fix_x_cp = fit_x.copy()
|
22
28
|
|
23
29
|
# Stupid code to ensure eval is feature equivalent to train data
|
@@ -32,9 +38,6 @@ def handle_fit_kwargs(*args, **kwargs) -> tuple[tuple[Any, ...], dict[str, Any]]
|
|
32
38
|
included_columns.append(col)
|
33
39
|
break
|
34
40
|
# We also need to update cat_features or catboost will yell at us
|
35
|
-
cat_features = list(
|
36
|
-
set(list(kwargs.get(CAT_FEATURES_ARG_KEY, []))) & set(included_columns)
|
37
|
-
)
|
38
41
|
args_list[0] = df[included_columns]
|
39
42
|
args = tuple(args_list)
|
40
43
|
|
@@ -45,7 +48,7 @@ def handle_fit_kwargs(*args, **kwargs) -> tuple[tuple[Any, ...], dict[str, Any]]
|
|
45
48
|
label=eval_y,
|
46
49
|
cat_features=cat_features,
|
47
50
|
)
|
48
|
-
kwargs[CAT_FEATURES_ARG_KEY] = cat_features
|
49
51
|
|
50
52
|
del kwargs[ORIGINAL_X_ARG_KEY]
|
53
|
+
|
51
54
|
return args, kwargs
|
@@ -2,12 +2,12 @@
|
|
2
2
|
|
3
3
|
# pylint: disable=line-too-long
|
4
4
|
import json
|
5
|
-
import logging
|
6
5
|
import os
|
7
6
|
from typing import Self
|
8
7
|
|
9
8
|
import optuna
|
10
9
|
import pandas as pd
|
10
|
+
import pytest_is_running
|
11
11
|
import torch
|
12
12
|
from catboost import CatBoost, Pool # type: ignore
|
13
13
|
|
@@ -83,12 +83,21 @@ class CatboostModel(Model):
|
|
83
83
|
def create_estimator(self):
|
84
84
|
return self._create_catboost()
|
85
85
|
|
86
|
+
def reset(self):
|
87
|
+
self._catboost = None
|
88
|
+
self._best_iteration = None
|
89
|
+
|
90
|
+
def convert_df(self, df: pd.DataFrame) -> pd.DataFrame:
|
91
|
+
return df
|
92
|
+
|
86
93
|
def set_options(
|
87
94
|
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
88
95
|
) -> None:
|
89
96
|
self._iterations = trial.suggest_int(_ITERATIONS_KEY, 100, 10000)
|
90
97
|
self._learning_rate = trial.suggest_float(_LEARNING_RATE_KEY, 0.001, 0.3)
|
91
|
-
self._depth = trial.suggest_int(
|
98
|
+
self._depth = trial.suggest_int(
|
99
|
+
_DEPTH_KEY, 1, 2 if pytest_is_running.is_running() else 6
|
100
|
+
)
|
92
101
|
self._l2_leaf_reg = trial.suggest_float(_L2_LEAF_REG_KEY, 3.0, 50.0)
|
93
102
|
self._boosting_type = trial.suggest_categorical(
|
94
103
|
_BOOSTING_TYPE_KEY, ["Ordered", "Plain"]
|
@@ -170,8 +179,6 @@ class CatboostModel(Model):
|
|
170
179
|
metric_period=100,
|
171
180
|
eval_set=eval_pool,
|
172
181
|
)
|
173
|
-
importances = catboost.get_feature_importance(prettified=True)
|
174
|
-
logging.info("Importances:\n%s", importances)
|
175
182
|
self._best_iteration = catboost.get_best_iteration()
|
176
183
|
return self
|
177
184
|
|
@@ -46,3 +46,11 @@ class Model(Params, Fit):
|
|
46
46
|
def create_estimator(self) -> Any:
|
47
47
|
"""Creates a new estimator."""
|
48
48
|
raise NotImplementedError("creates_estimator not implemented in parent class.")
|
49
|
+
|
50
|
+
def reset(self) -> None:
|
51
|
+
"""Resets a model."""
|
52
|
+
raise NotImplementedError("reset not implemented in parent class.")
|
53
|
+
|
54
|
+
def convert_df(self, df: pd.DataFrame) -> pd.DataFrame:
|
55
|
+
"""Converts a dataframe for use with a model."""
|
56
|
+
raise NotImplementedError("convert_df not implemented in parent class.")
|
@@ -66,6 +66,18 @@ class ModelRouter(Model):
|
|
66
66
|
raise ValueError("model is null")
|
67
67
|
return model.create_estimator()
|
68
68
|
|
69
|
+
def reset(self):
|
70
|
+
model = self._model
|
71
|
+
if model is None:
|
72
|
+
raise ValueError("model is null")
|
73
|
+
model.reset()
|
74
|
+
|
75
|
+
def convert_df(self, df: pd.DataFrame) -> pd.DataFrame:
|
76
|
+
model = self._model
|
77
|
+
if model is None:
|
78
|
+
raise ValueError("model is null")
|
79
|
+
return model.convert_df(df)
|
80
|
+
|
69
81
|
def set_options(
|
70
82
|
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
71
83
|
) -> None:
|
@@ -56,6 +56,12 @@ class TabPFNModel(Model):
|
|
56
56
|
def create_estimator(self):
|
57
57
|
return self._create_tabpfn()
|
58
58
|
|
59
|
+
def reset(self):
|
60
|
+
pass
|
61
|
+
|
62
|
+
def convert_df(self, df: pd.DataFrame) -> pd.DataFrame:
|
63
|
+
return df
|
64
|
+
|
59
65
|
def set_options(
|
60
66
|
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
61
67
|
) -> None:
|
@@ -7,6 +7,7 @@ from typing import Self
|
|
7
7
|
|
8
8
|
import optuna
|
9
9
|
import pandas as pd
|
10
|
+
import pytest_is_running
|
10
11
|
import torch
|
11
12
|
from xgboost import XGBClassifier, XGBRegressor
|
12
13
|
from xgboost.callback import TrainingCallback
|
@@ -107,7 +108,11 @@ class XGBoostModel(Model):
|
|
107
108
|
@property
|
108
109
|
def feature_importances(self) -> dict[str, float]:
|
109
110
|
bst = self._provide_xgboost()
|
110
|
-
|
111
|
+
try:
|
112
|
+
return bst.get_booster().get_score(importance_type="weight") # type: ignore
|
113
|
+
except XGBoostError as exc:
|
114
|
+
print(str(exc))
|
115
|
+
return {}
|
111
116
|
|
112
117
|
def provide_estimator(self):
|
113
118
|
return self._provide_xgboost()
|
@@ -115,6 +120,13 @@ class XGBoostModel(Model):
|
|
115
120
|
def create_estimator(self):
|
116
121
|
return self._create_xgboost()
|
117
122
|
|
123
|
+
def reset(self):
|
124
|
+
self._xgboost = None
|
125
|
+
self._best_iteration = None
|
126
|
+
|
127
|
+
def convert_df(self, df: pd.DataFrame) -> pd.DataFrame:
|
128
|
+
return _convert_categoricals(df)
|
129
|
+
|
118
130
|
def set_options(
|
119
131
|
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
120
132
|
) -> None:
|
@@ -126,7 +138,9 @@ class XGBoostModel(Model):
|
|
126
138
|
self._subsample = trial.suggest_float(_SUBSAMPLE_KEY, 0.2, 1.0)
|
127
139
|
self._colsample_bytree = trial.suggest_float(_COLSAMPLE_BYTREE_KEY, 0.2, 1.0)
|
128
140
|
if self._booster in ["gbtree", "dart"]:
|
129
|
-
self._max_depth = trial.suggest_int(
|
141
|
+
self._max_depth = trial.suggest_int(
|
142
|
+
_MAX_DEPTH_KEY, 3, 4 if pytest_is_running.is_running() else 9
|
143
|
+
)
|
130
144
|
self._min_child_weight = trial.suggest_int(
|
131
145
|
_MIN_CHILD_WEIGHT_KEY, 2, 10, log=True
|
132
146
|
)
|
@@ -144,7 +158,9 @@ class XGBoostModel(Model):
|
|
144
158
|
)
|
145
159
|
self._rate_drop = trial.suggest_float(_RATE_DROP_KEY, 1e-8, 1.0, log=True)
|
146
160
|
self._skip_drop = trial.suggest_float(_SKIP_DROP_KEY, 1e-8, 1.0, log=True)
|
147
|
-
self._num_boost_rounds = trial.suggest_int(
|
161
|
+
self._num_boost_rounds = trial.suggest_int(
|
162
|
+
_NUM_BOOST_ROUNDS_KEY, 100, 110 if pytest_is_running.is_running() else 10000
|
163
|
+
)
|
148
164
|
self._early_stopping_rounds = trial.suggest_int(
|
149
165
|
_EARLY_STOPPING_ROUNDS_KEY, 50, 500
|
150
166
|
)
|
@@ -275,7 +291,8 @@ class XGBoostModel(Model):
|
|
275
291
|
)
|
276
292
|
param = {
|
277
293
|
"objective": "binary:logistic",
|
278
|
-
"tree_method": "
|
294
|
+
"tree_method": "hist" if torch.cuda.is_available() else "exact",
|
295
|
+
"device": "cuda" if torch.cuda.is_available() else "cpu",
|
279
296
|
# defines booster, gblinear for linear functions.
|
280
297
|
"booster": self._booster,
|
281
298
|
# L2 regularization weight.
|
@@ -7,7 +7,6 @@ import joblib # type: ignore
|
|
7
7
|
import optuna
|
8
8
|
import pandas as pd
|
9
9
|
from feature_engine.selection.base_selector import BaseSelector
|
10
|
-
from sklearn.utils.validation import check_is_fitted # type: ignore
|
11
10
|
|
12
11
|
from .reducer import Reducer
|
13
12
|
|
@@ -60,6 +59,4 @@ class BaseSelectorReducer(Reducer):
|
|
60
59
|
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
61
60
|
if len(df.columns) <= 1:
|
62
61
|
return df
|
63
|
-
if not check_is_fitted(self._base_selector):
|
64
|
-
return df
|
65
62
|
return self._base_selector.transform(df)
|
@@ -1,5 +1,6 @@
|
|
1
1
|
"""A reducer that combines all the other reducers."""
|
2
2
|
|
3
|
+
# pylint: disable=line-too-long
|
3
4
|
import json
|
4
5
|
import logging
|
5
6
|
import os
|
@@ -107,8 +108,8 @@ class CombinedReducer(Reducer):
|
|
107
108
|
removed_columns = before_columns.difference(after_columns)
|
108
109
|
if removed_columns:
|
109
110
|
removed_columns_dict[reducer.name()] = list(removed_columns)
|
110
|
-
|
111
|
-
|
111
|
+
print(
|
112
|
+
f"{reducer.name()} reducer took {time.time() - start_reducer} and removed {len(removed_columns)} features",
|
112
113
|
)
|
113
114
|
if self._folder is not None:
|
114
115
|
with open(
|
@@ -37,7 +37,7 @@ class CorrelationReducer(BaseSelectorReducer):
|
|
37
37
|
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
38
38
|
) -> None:
|
39
39
|
self._correlation_selector.threshold = trial.suggest_float(
|
40
|
-
_CORRELATION_REDUCER_THRESHOLD, 0.
|
40
|
+
_CORRELATION_REDUCER_THRESHOLD, 0.7, 0.99
|
41
41
|
)
|
42
42
|
|
43
43
|
def fit(
|
@@ -33,7 +33,7 @@ class SmartCorrelationReducer(BaseSelectorReducer):
|
|
33
33
|
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
34
34
|
) -> None:
|
35
35
|
self._correlation_selector.threshold = trial.suggest_float(
|
36
|
-
_SMART_CORRELATION_REDUCER_THRESHOLD, 0.
|
36
|
+
_SMART_CORRELATION_REDUCER_THRESHOLD, 0.7, 0.99
|
37
37
|
)
|
38
38
|
|
39
39
|
def fit(
|
@@ -48,3 +48,8 @@ class SmartCorrelationReducer(BaseSelectorReducer):
|
|
48
48
|
if len(self._correlation_selector.variables) <= 1:
|
49
49
|
return self
|
50
50
|
return super().fit(df, y=y, w=w, eval_x=eval_x, eval_y=eval_y)
|
51
|
+
|
52
|
+
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
53
|
+
if len(find_non_categorical_numeric_columns(df)) <= 1:
|
54
|
+
return df
|
55
|
+
return super().transform(df)
|
@@ -57,7 +57,8 @@ class Selector(Params, Fit):
|
|
57
57
|
return self
|
58
58
|
if not isinstance(y, pd.Series):
|
59
59
|
raise ValueError("y is not a series.")
|
60
|
-
|
60
|
+
total_columns = len(df.columns)
|
61
|
+
if total_columns <= 1:
|
61
62
|
return self
|
62
63
|
print(
|
63
64
|
f"Performing feature selection with {self._steps} steps and a total ratio of {self._feature_ratio}"
|
@@ -81,6 +82,9 @@ class Selector(Params, Fit):
|
|
81
82
|
if not current_features:
|
82
83
|
current_features = [list(feature_importances.keys())[0]]
|
83
84
|
current_features = current_features[:required_features]
|
85
|
+
print(
|
86
|
+
f"Current Features:\n{pd.Series(data=list(feature_importances.values()), index=list(feature_importances.keys()))}\n"
|
87
|
+
)
|
84
88
|
|
85
89
|
n_features = len(current_features)
|
86
90
|
for i in range(self._steps):
|
@@ -90,16 +94,18 @@ class Selector(Params, Fit):
|
|
90
94
|
ratio_diff = 1.0 - self._feature_ratio
|
91
95
|
ratio_step = ratio_diff / float(self._steps)
|
92
96
|
current_ratio = 1.0 - (ratio_step * i)
|
93
|
-
n_features = max(1, int(
|
97
|
+
n_features = max(1, int(total_columns * current_ratio))
|
94
98
|
if n_features >= len(current_features):
|
95
99
|
continue
|
96
100
|
|
101
|
+
self._model.reset()
|
97
102
|
self._model.fit(df, y=y, w=w, eval_x=eval_x, eval_y=eval_y)
|
98
103
|
set_current_features(n_features)
|
99
104
|
print(f"Reduced features to {len(current_features)}")
|
100
105
|
df = df[current_features]
|
101
106
|
if eval_x is not None:
|
102
107
|
eval_x = eval_x[current_features]
|
108
|
+
print(f"Final feature count: {len(current_features)}")
|
103
109
|
|
104
110
|
self._selector = current_features
|
105
111
|
|
@@ -212,7 +212,7 @@ class Trainer(Fit):
|
|
212
212
|
folder = os.path.join(
|
213
213
|
self._folder, str(y_series.name), split_idx.isoformat()
|
214
214
|
)
|
215
|
-
new_folder = os.path.exists(folder)
|
215
|
+
new_folder = not os.path.exists(folder)
|
216
216
|
os.makedirs(folder, exist_ok=True)
|
217
217
|
trial_file = os.path.join(folder, _TRIAL_FILENAME)
|
218
218
|
if os.path.exists(trial_file):
|
@@ -281,13 +281,14 @@ class Trainer(Fit):
|
|
281
281
|
x_test = selector.transform(x_test)
|
282
282
|
print(f"Selection took {time.time() - start_selector}")
|
283
283
|
start_train = time.time()
|
284
|
-
|
284
|
+
model.fit(
|
285
285
|
x_train,
|
286
286
|
y=y_train,
|
287
287
|
w=w,
|
288
288
|
eval_x=x_test if not no_evaluation else None,
|
289
289
|
eval_y=y_test if not no_evaluation else None,
|
290
290
|
)
|
291
|
+
y_pred = model.transform(x_test)
|
291
292
|
print(f"Training took {time.time() - start_train}")
|
292
293
|
|
293
294
|
# Calibrate
|
@@ -295,13 +296,12 @@ class Trainer(Fit):
|
|
295
296
|
calibrator = CalibratorRouter(model)
|
296
297
|
calibrator.set_options(trial, x)
|
297
298
|
calibrator.fit(
|
298
|
-
|
299
|
-
y=
|
299
|
+
y_pred if calibrator.predictions_as_x(y_test) else x_test,
|
300
|
+
y=y_test,
|
300
301
|
)
|
301
302
|
print(f"Calibrating took {time.time() - start_calibrate}")
|
302
303
|
|
303
304
|
# Output
|
304
|
-
y_pred = model.transform(x_test)
|
305
305
|
cal_pred = calibrator.transform(
|
306
306
|
y_pred if calibrator.predictions_as_x(y_test) else x_test
|
307
307
|
)
|
@@ -441,6 +441,8 @@ class Trainer(Fit):
|
|
441
441
|
if self._max_train_timeout is None
|
442
442
|
else self._max_train_timeout.total_seconds(),
|
443
443
|
)
|
444
|
+
else:
|
445
|
+
break
|
444
446
|
|
445
447
|
_fit(study.best_trial, test_df, test_series, True, test_idx, True)
|
446
448
|
last_processed_dt = test_idx
|
@@ -515,7 +517,9 @@ class Trainer(Fit):
|
|
515
517
|
x_pred = reducer.transform(group[feature_columns])
|
516
518
|
x_pred = selector.transform(x_pred)
|
517
519
|
y_pred = model.transform(x_pred)
|
518
|
-
y_pred = calibrator.transform(
|
520
|
+
y_pred = calibrator.transform(
|
521
|
+
y_pred if calibrator.predictions_as_x(None) else x_pred
|
522
|
+
)
|
519
523
|
for new_column in y_pred.columns.values:
|
520
524
|
group["_".join([column, new_column])] = y_pred[new_column]
|
521
525
|
return group
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/model/catboost/catboost_classifier_wrap.py
RENAMED
File without changes
|
{wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/model/catboost/catboost_regressor_wrap.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{wavetrainer-0.0.42 → wavetrainer-0.0.44}/wavetrainer/reducer/non_categorical_numeric_columns.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|