wavetrainer 0.0.43__tar.gz → 0.0.44__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {wavetrainer-0.0.43/wavetrainer.egg-info → wavetrainer-0.0.44}/PKG-INFO +1 -1
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/setup.py +1 -1
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/tests/trainer_test.py +1 -1
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/__init__.py +1 -1
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/calibrator/calibrator_router.py +3 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/calibrator/mapie_calibrator.py +9 -6
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model/catboost/catboost_kwargs.py +10 -7
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model/catboost/catboost_model.py +11 -4
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model/model.py +8 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model/model_router.py +12 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model/tabpfn/tabpfn_model.py +6 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model/xgboost/xgboost_model.py +16 -3
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/reducer/base_selector_reducer.py +0 -3
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/reducer/combined_reducer.py +3 -2
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/reducer/correlation_reducer.py +1 -1
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/reducer/smart_correlation_reducer.py +6 -1
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/selector/selector.py +8 -2
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/trainer.py +4 -2
- {wavetrainer-0.0.43 → wavetrainer-0.0.44/wavetrainer.egg-info}/PKG-INFO +1 -1
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/LICENSE +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/MANIFEST.in +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/README.md +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/requirements.txt +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/setup.cfg +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/tests/__init__.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/tests/model/__init__.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/tests/model/catboost_kwargs_test.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/calibrator/__init__.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/calibrator/calibrator.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/calibrator/vennabers_calibrator.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/create.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/exceptions.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/fit.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model/__init__.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model/catboost/__init__.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model/catboost/catboost_classifier_wrap.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model/catboost/catboost_regressor_wrap.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model/tabpfn/__init__.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model/xgboost/__init__.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model/xgboost/early_stopper.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model/xgboost/xgboost_logger.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model_type.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/params.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/reducer/__init__.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/reducer/constant_reducer.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/reducer/duplicate_reducer.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/reducer/non_categorical_numeric_columns.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/reducer/nonnumeric_reducer.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/reducer/reducer.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/reducer/select_by_single_feature_performance_reducer.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/reducer/unseen_reducer.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/selector/__init__.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/weights/__init__.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/weights/class_weights.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/weights/combined_weights.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/weights/exponential_weights.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/weights/linear_weights.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/weights/noop_weights.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/weights/sigmoid_weights.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/weights/weights.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/weights/weights_router.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/windower/__init__.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/windower/windower.py +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer.egg-info/SOURCES.txt +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer.egg-info/dependency_links.txt +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer.egg-info/not-zip-safe +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer.egg-info/requires.txt +0 -0
- {wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
|
|
23
23
|
|
24
24
|
setup(
|
25
25
|
name='wavetrainer',
|
26
|
-
version='0.0.
|
26
|
+
version='0.0.44',
|
27
27
|
description='A library for automatically finding the optimal model within feature and hyperparameter space.',
|
28
28
|
long_description=long_description,
|
29
29
|
long_description_content_type='text/markdown',
|
@@ -40,7 +40,7 @@ class TestTrainer(unittest.TestCase):
|
|
40
40
|
|
41
41
|
def test_trainer_dt_column(self):
|
42
42
|
with tempfile.TemporaryDirectory() as tmpdir:
|
43
|
-
trainer = Trainer(tmpdir, walkforward_timedelta=datetime.timedelta(days=7), trials=
|
43
|
+
trainer = Trainer(tmpdir, walkforward_timedelta=datetime.timedelta(days=7), trials=5, dt_column="dt_column")
|
44
44
|
x_data = [i for i in range(100)]
|
45
45
|
x_index = [datetime.datetime(2022, 1, 1) + datetime.timedelta(days=i) for i in range(len(x_data))]
|
46
46
|
df = pd.DataFrame(
|
@@ -37,6 +37,9 @@ class CalibratorRouter(Calibrator):
|
|
37
37
|
return "router"
|
38
38
|
|
39
39
|
def predictions_as_x(self, y: pd.Series | pd.DataFrame | None = None) -> bool:
|
40
|
+
calibrator = self._calibrator
|
41
|
+
if calibrator is not None:
|
42
|
+
return calibrator.predictions_as_x(None)
|
40
43
|
if y is None:
|
41
44
|
raise ValueError("y is null")
|
42
45
|
if determine_model_type(y) == ModelType.REGRESSION:
|
@@ -21,7 +21,7 @@ class MAPIECalibrator(Calibrator):
|
|
21
21
|
|
22
22
|
def __init__(self, model: Model):
|
23
23
|
super().__init__(model)
|
24
|
-
self._mapie = MapieRegressor(model.create_estimator(), method="plus")
|
24
|
+
self._mapie = MapieRegressor(model.create_estimator(), method="plus", cv=5)
|
25
25
|
|
26
26
|
@classmethod
|
27
27
|
def name(cls) -> str:
|
@@ -54,19 +54,22 @@ class MAPIECalibrator(Calibrator):
|
|
54
54
|
raise ValueError("mapie is null")
|
55
55
|
if y is None:
|
56
56
|
raise ValueError("y is null")
|
57
|
-
|
58
|
-
return self
|
59
|
-
mapie.fit(df.to_numpy(), y.to_numpy())
|
57
|
+
mapie.fit(self._model.convert_df(df), y)
|
60
58
|
return self
|
61
59
|
|
62
60
|
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
63
61
|
alpha = []
|
64
62
|
for potential_alpha in [0.05, 0.32]:
|
65
|
-
if
|
63
|
+
if (
|
64
|
+
len(df) > int(1.0 / potential_alpha) + 1
|
65
|
+
and len(df) > int(1.0 / (1.0 - potential_alpha)) + 1
|
66
|
+
):
|
66
67
|
alpha.append(potential_alpha)
|
67
68
|
ret_df = pd.DataFrame(index=df.index)
|
68
69
|
if alpha:
|
69
|
-
_, y_pis = self._mapie.predict(
|
70
|
+
_, y_pis = self._mapie.predict(
|
71
|
+
self._model.convert_df(df), alpha=alpha, allow_infinite_bounds=True
|
72
|
+
)
|
70
73
|
for i in range(y_pis.shape[1]):
|
71
74
|
if i >= len(alpha):
|
72
75
|
continue
|
@@ -3,6 +3,7 @@
|
|
3
3
|
from typing import Any
|
4
4
|
|
5
5
|
import numpy as np
|
6
|
+
import pandas as pd
|
6
7
|
from catboost import Pool # type: ignore
|
7
8
|
|
8
9
|
ORIGINAL_X_ARG_KEY = "original_x"
|
@@ -12,12 +13,17 @@ CAT_FEATURES_ARG_KEY = "cat_features"
|
|
12
13
|
|
13
14
|
def handle_fit_kwargs(*args, **kwargs) -> tuple[tuple[Any, ...], dict[str, Any]]:
|
14
15
|
"""Handles keyword args coming into a catboost fit method."""
|
16
|
+
args_list = list(args)
|
17
|
+
fit_x = args_list[0]
|
18
|
+
|
19
|
+
cat_features = kwargs.get(CAT_FEATURES_ARG_KEY)
|
20
|
+
if cat_features is None and isinstance(fit_x, pd.DataFrame):
|
21
|
+
cat_features = fit_x.select_dtypes(include="category").columns.tolist()
|
22
|
+
kwargs[CAT_FEATURES_ARG_KEY] = cat_features
|
23
|
+
|
15
24
|
if ORIGINAL_X_ARG_KEY in kwargs:
|
16
25
|
df = kwargs[ORIGINAL_X_ARG_KEY]
|
17
26
|
eval_x, eval_y = kwargs[EVAL_SET_ARG_KEY]
|
18
|
-
cat_features = kwargs[CAT_FEATURES_ARG_KEY]
|
19
|
-
args_list = list(args)
|
20
|
-
fit_x = args_list[0]
|
21
27
|
fix_x_cp = fit_x.copy()
|
22
28
|
|
23
29
|
# Stupid code to ensure eval is feature equivalent to train data
|
@@ -32,9 +38,6 @@ def handle_fit_kwargs(*args, **kwargs) -> tuple[tuple[Any, ...], dict[str, Any]]
|
|
32
38
|
included_columns.append(col)
|
33
39
|
break
|
34
40
|
# We also need to update cat_features or catboost will yell at us
|
35
|
-
cat_features = list(
|
36
|
-
set(list(kwargs.get(CAT_FEATURES_ARG_KEY, []))) & set(included_columns)
|
37
|
-
)
|
38
41
|
args_list[0] = df[included_columns]
|
39
42
|
args = tuple(args_list)
|
40
43
|
|
@@ -45,7 +48,7 @@ def handle_fit_kwargs(*args, **kwargs) -> tuple[tuple[Any, ...], dict[str, Any]]
|
|
45
48
|
label=eval_y,
|
46
49
|
cat_features=cat_features,
|
47
50
|
)
|
48
|
-
kwargs[CAT_FEATURES_ARG_KEY] = cat_features
|
49
51
|
|
50
52
|
del kwargs[ORIGINAL_X_ARG_KEY]
|
53
|
+
|
51
54
|
return args, kwargs
|
@@ -2,12 +2,12 @@
|
|
2
2
|
|
3
3
|
# pylint: disable=line-too-long
|
4
4
|
import json
|
5
|
-
import logging
|
6
5
|
import os
|
7
6
|
from typing import Self
|
8
7
|
|
9
8
|
import optuna
|
10
9
|
import pandas as pd
|
10
|
+
import pytest_is_running
|
11
11
|
import torch
|
12
12
|
from catboost import CatBoost, Pool # type: ignore
|
13
13
|
|
@@ -83,12 +83,21 @@ class CatboostModel(Model):
|
|
83
83
|
def create_estimator(self):
|
84
84
|
return self._create_catboost()
|
85
85
|
|
86
|
+
def reset(self):
|
87
|
+
self._catboost = None
|
88
|
+
self._best_iteration = None
|
89
|
+
|
90
|
+
def convert_df(self, df: pd.DataFrame) -> pd.DataFrame:
|
91
|
+
return df
|
92
|
+
|
86
93
|
def set_options(
|
87
94
|
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
88
95
|
) -> None:
|
89
96
|
self._iterations = trial.suggest_int(_ITERATIONS_KEY, 100, 10000)
|
90
97
|
self._learning_rate = trial.suggest_float(_LEARNING_RATE_KEY, 0.001, 0.3)
|
91
|
-
self._depth = trial.suggest_int(
|
98
|
+
self._depth = trial.suggest_int(
|
99
|
+
_DEPTH_KEY, 1, 2 if pytest_is_running.is_running() else 6
|
100
|
+
)
|
92
101
|
self._l2_leaf_reg = trial.suggest_float(_L2_LEAF_REG_KEY, 3.0, 50.0)
|
93
102
|
self._boosting_type = trial.suggest_categorical(
|
94
103
|
_BOOSTING_TYPE_KEY, ["Ordered", "Plain"]
|
@@ -170,8 +179,6 @@ class CatboostModel(Model):
|
|
170
179
|
metric_period=100,
|
171
180
|
eval_set=eval_pool,
|
172
181
|
)
|
173
|
-
importances = catboost.get_feature_importance(prettified=True)
|
174
|
-
logging.info("Importances:\n%s", importances)
|
175
182
|
self._best_iteration = catboost.get_best_iteration()
|
176
183
|
return self
|
177
184
|
|
@@ -46,3 +46,11 @@ class Model(Params, Fit):
|
|
46
46
|
def create_estimator(self) -> Any:
|
47
47
|
"""Creates a new estimator."""
|
48
48
|
raise NotImplementedError("creates_estimator not implemented in parent class.")
|
49
|
+
|
50
|
+
def reset(self) -> None:
|
51
|
+
"""Resets a model."""
|
52
|
+
raise NotImplementedError("reset not implemented in parent class.")
|
53
|
+
|
54
|
+
def convert_df(self, df: pd.DataFrame) -> pd.DataFrame:
|
55
|
+
"""Converts a dataframe for use with a model."""
|
56
|
+
raise NotImplementedError("convert_df not implemented in parent class.")
|
@@ -66,6 +66,18 @@ class ModelRouter(Model):
|
|
66
66
|
raise ValueError("model is null")
|
67
67
|
return model.create_estimator()
|
68
68
|
|
69
|
+
def reset(self):
|
70
|
+
model = self._model
|
71
|
+
if model is None:
|
72
|
+
raise ValueError("model is null")
|
73
|
+
model.reset()
|
74
|
+
|
75
|
+
def convert_df(self, df: pd.DataFrame) -> pd.DataFrame:
|
76
|
+
model = self._model
|
77
|
+
if model is None:
|
78
|
+
raise ValueError("model is null")
|
79
|
+
return model.convert_df(df)
|
80
|
+
|
69
81
|
def set_options(
|
70
82
|
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
71
83
|
) -> None:
|
@@ -56,6 +56,12 @@ class TabPFNModel(Model):
|
|
56
56
|
def create_estimator(self):
|
57
57
|
return self._create_tabpfn()
|
58
58
|
|
59
|
+
def reset(self):
|
60
|
+
pass
|
61
|
+
|
62
|
+
def convert_df(self, df: pd.DataFrame) -> pd.DataFrame:
|
63
|
+
return df
|
64
|
+
|
59
65
|
def set_options(
|
60
66
|
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
61
67
|
) -> None:
|
@@ -7,6 +7,7 @@ from typing import Self
|
|
7
7
|
|
8
8
|
import optuna
|
9
9
|
import pandas as pd
|
10
|
+
import pytest_is_running
|
10
11
|
import torch
|
11
12
|
from xgboost import XGBClassifier, XGBRegressor
|
12
13
|
from xgboost.callback import TrainingCallback
|
@@ -119,6 +120,13 @@ class XGBoostModel(Model):
|
|
119
120
|
def create_estimator(self):
|
120
121
|
return self._create_xgboost()
|
121
122
|
|
123
|
+
def reset(self):
|
124
|
+
self._xgboost = None
|
125
|
+
self._best_iteration = None
|
126
|
+
|
127
|
+
def convert_df(self, df: pd.DataFrame) -> pd.DataFrame:
|
128
|
+
return _convert_categoricals(df)
|
129
|
+
|
122
130
|
def set_options(
|
123
131
|
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
124
132
|
) -> None:
|
@@ -130,7 +138,9 @@ class XGBoostModel(Model):
|
|
130
138
|
self._subsample = trial.suggest_float(_SUBSAMPLE_KEY, 0.2, 1.0)
|
131
139
|
self._colsample_bytree = trial.suggest_float(_COLSAMPLE_BYTREE_KEY, 0.2, 1.0)
|
132
140
|
if self._booster in ["gbtree", "dart"]:
|
133
|
-
self._max_depth = trial.suggest_int(
|
141
|
+
self._max_depth = trial.suggest_int(
|
142
|
+
_MAX_DEPTH_KEY, 3, 4 if pytest_is_running.is_running() else 9
|
143
|
+
)
|
134
144
|
self._min_child_weight = trial.suggest_int(
|
135
145
|
_MIN_CHILD_WEIGHT_KEY, 2, 10, log=True
|
136
146
|
)
|
@@ -148,7 +158,9 @@ class XGBoostModel(Model):
|
|
148
158
|
)
|
149
159
|
self._rate_drop = trial.suggest_float(_RATE_DROP_KEY, 1e-8, 1.0, log=True)
|
150
160
|
self._skip_drop = trial.suggest_float(_SKIP_DROP_KEY, 1e-8, 1.0, log=True)
|
151
|
-
self._num_boost_rounds = trial.suggest_int(
|
161
|
+
self._num_boost_rounds = trial.suggest_int(
|
162
|
+
_NUM_BOOST_ROUNDS_KEY, 100, 110 if pytest_is_running.is_running() else 10000
|
163
|
+
)
|
152
164
|
self._early_stopping_rounds = trial.suggest_int(
|
153
165
|
_EARLY_STOPPING_ROUNDS_KEY, 50, 500
|
154
166
|
)
|
@@ -279,7 +291,8 @@ class XGBoostModel(Model):
|
|
279
291
|
)
|
280
292
|
param = {
|
281
293
|
"objective": "binary:logistic",
|
282
|
-
"tree_method": "
|
294
|
+
"tree_method": "hist" if torch.cuda.is_available() else "exact",
|
295
|
+
"device": "cuda" if torch.cuda.is_available() else "cpu",
|
283
296
|
# defines booster, gblinear for linear functions.
|
284
297
|
"booster": self._booster,
|
285
298
|
# L2 regularization weight.
|
@@ -7,7 +7,6 @@ import joblib # type: ignore
|
|
7
7
|
import optuna
|
8
8
|
import pandas as pd
|
9
9
|
from feature_engine.selection.base_selector import BaseSelector
|
10
|
-
from sklearn.utils.validation import check_is_fitted # type: ignore
|
11
10
|
|
12
11
|
from .reducer import Reducer
|
13
12
|
|
@@ -60,6 +59,4 @@ class BaseSelectorReducer(Reducer):
|
|
60
59
|
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
61
60
|
if len(df.columns) <= 1:
|
62
61
|
return df
|
63
|
-
if not check_is_fitted(self._base_selector):
|
64
|
-
return df
|
65
62
|
return self._base_selector.transform(df)
|
@@ -1,5 +1,6 @@
|
|
1
1
|
"""A reducer that combines all the other reducers."""
|
2
2
|
|
3
|
+
# pylint: disable=line-too-long
|
3
4
|
import json
|
4
5
|
import logging
|
5
6
|
import os
|
@@ -107,8 +108,8 @@ class CombinedReducer(Reducer):
|
|
107
108
|
removed_columns = before_columns.difference(after_columns)
|
108
109
|
if removed_columns:
|
109
110
|
removed_columns_dict[reducer.name()] = list(removed_columns)
|
110
|
-
|
111
|
-
|
111
|
+
print(
|
112
|
+
f"{reducer.name()} reducer took {time.time() - start_reducer} and removed {len(removed_columns)} features",
|
112
113
|
)
|
113
114
|
if self._folder is not None:
|
114
115
|
with open(
|
@@ -37,7 +37,7 @@ class CorrelationReducer(BaseSelectorReducer):
|
|
37
37
|
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
38
38
|
) -> None:
|
39
39
|
self._correlation_selector.threshold = trial.suggest_float(
|
40
|
-
_CORRELATION_REDUCER_THRESHOLD, 0.
|
40
|
+
_CORRELATION_REDUCER_THRESHOLD, 0.7, 0.99
|
41
41
|
)
|
42
42
|
|
43
43
|
def fit(
|
@@ -33,7 +33,7 @@ class SmartCorrelationReducer(BaseSelectorReducer):
|
|
33
33
|
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
34
34
|
) -> None:
|
35
35
|
self._correlation_selector.threshold = trial.suggest_float(
|
36
|
-
_SMART_CORRELATION_REDUCER_THRESHOLD, 0.
|
36
|
+
_SMART_CORRELATION_REDUCER_THRESHOLD, 0.7, 0.99
|
37
37
|
)
|
38
38
|
|
39
39
|
def fit(
|
@@ -48,3 +48,8 @@ class SmartCorrelationReducer(BaseSelectorReducer):
|
|
48
48
|
if len(self._correlation_selector.variables) <= 1:
|
49
49
|
return self
|
50
50
|
return super().fit(df, y=y, w=w, eval_x=eval_x, eval_y=eval_y)
|
51
|
+
|
52
|
+
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
53
|
+
if len(find_non_categorical_numeric_columns(df)) <= 1:
|
54
|
+
return df
|
55
|
+
return super().transform(df)
|
@@ -57,7 +57,8 @@ class Selector(Params, Fit):
|
|
57
57
|
return self
|
58
58
|
if not isinstance(y, pd.Series):
|
59
59
|
raise ValueError("y is not a series.")
|
60
|
-
|
60
|
+
total_columns = len(df.columns)
|
61
|
+
if total_columns <= 1:
|
61
62
|
return self
|
62
63
|
print(
|
63
64
|
f"Performing feature selection with {self._steps} steps and a total ratio of {self._feature_ratio}"
|
@@ -81,6 +82,9 @@ class Selector(Params, Fit):
|
|
81
82
|
if not current_features:
|
82
83
|
current_features = [list(feature_importances.keys())[0]]
|
83
84
|
current_features = current_features[:required_features]
|
85
|
+
print(
|
86
|
+
f"Current Features:\n{pd.Series(data=list(feature_importances.values()), index=list(feature_importances.keys()))}\n"
|
87
|
+
)
|
84
88
|
|
85
89
|
n_features = len(current_features)
|
86
90
|
for i in range(self._steps):
|
@@ -90,16 +94,18 @@ class Selector(Params, Fit):
|
|
90
94
|
ratio_diff = 1.0 - self._feature_ratio
|
91
95
|
ratio_step = ratio_diff / float(self._steps)
|
92
96
|
current_ratio = 1.0 - (ratio_step * i)
|
93
|
-
n_features = max(1, int(
|
97
|
+
n_features = max(1, int(total_columns * current_ratio))
|
94
98
|
if n_features >= len(current_features):
|
95
99
|
continue
|
96
100
|
|
101
|
+
self._model.reset()
|
97
102
|
self._model.fit(df, y=y, w=w, eval_x=eval_x, eval_y=eval_y)
|
98
103
|
set_current_features(n_features)
|
99
104
|
print(f"Reduced features to {len(current_features)}")
|
100
105
|
df = df[current_features]
|
101
106
|
if eval_x is not None:
|
102
107
|
eval_x = eval_x[current_features]
|
108
|
+
print(f"Final feature count: {len(current_features)}")
|
103
109
|
|
104
110
|
self._selector = current_features
|
105
111
|
|
@@ -212,7 +212,7 @@ class Trainer(Fit):
|
|
212
212
|
folder = os.path.join(
|
213
213
|
self._folder, str(y_series.name), split_idx.isoformat()
|
214
214
|
)
|
215
|
-
new_folder = os.path.exists(folder)
|
215
|
+
new_folder = not os.path.exists(folder)
|
216
216
|
os.makedirs(folder, exist_ok=True)
|
217
217
|
trial_file = os.path.join(folder, _TRIAL_FILENAME)
|
218
218
|
if os.path.exists(trial_file):
|
@@ -517,7 +517,9 @@ class Trainer(Fit):
|
|
517
517
|
x_pred = reducer.transform(group[feature_columns])
|
518
518
|
x_pred = selector.transform(x_pred)
|
519
519
|
y_pred = model.transform(x_pred)
|
520
|
-
y_pred = calibrator.transform(
|
520
|
+
y_pred = calibrator.transform(
|
521
|
+
y_pred if calibrator.predictions_as_x(None) else x_pred
|
522
|
+
)
|
521
523
|
for new_column in y_pred.columns.values:
|
522
524
|
group["_".join([column, new_column])] = y_pred[new_column]
|
523
525
|
return group
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model/catboost/catboost_classifier_wrap.py
RENAMED
File without changes
|
{wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/model/catboost/catboost_regressor_wrap.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{wavetrainer-0.0.43 → wavetrainer-0.0.44}/wavetrainer/reducer/non_categorical_numeric_columns.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|