wavetrainer 0.1.10__tar.gz → 0.1.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {wavetrainer-0.1.10/wavetrainer.egg-info → wavetrainer-0.1.11}/PKG-INFO +3 -1
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/README.md +1 -0
- wavetrainer-0.1.10/wavetrainer.egg-info/requires.txt → wavetrainer-0.1.11/requirements.txt +1 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/setup.py +1 -1
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/__init__.py +1 -1
- wavetrainer-0.1.11/wavetrainer/model/lightgbm/__init__.py +1 -0
- wavetrainer-0.1.11/wavetrainer/model/lightgbm/lightgbm_model.py +245 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/model/model_router.py +2 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/model/xgboost/xgboost_model.py +1 -1
- {wavetrainer-0.1.10 → wavetrainer-0.1.11/wavetrainer.egg-info}/PKG-INFO +3 -1
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer.egg-info/SOURCES.txt +2 -0
- wavetrainer-0.1.10/requirements.txt → wavetrainer-0.1.11/wavetrainer.egg-info/requires.txt +2 -1
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/LICENSE +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/MANIFEST.in +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/setup.cfg +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/tests/__init__.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/tests/model/__init__.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/tests/model/catboost_kwargs_test.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/tests/trainer_test.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/calibrator/__init__.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/calibrator/calibrator.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/calibrator/calibrator_router.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/calibrator/vennabers_calibrator.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/create.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/exceptions.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/fit.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/model/__init__.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/model/catboost/__init__.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/model/catboost/catboost_classifier_wrap.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/model/catboost/catboost_kwargs.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/model/catboost/catboost_model.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/model/catboost/catboost_regressor_wrap.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/model/model.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/model/tabpfn/__init__.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/model/tabpfn/tabpfn_model.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/model/xgboost/__init__.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/model/xgboost/early_stopper.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/model/xgboost/xgboost_logger.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/model_type.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/params.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/reducer/__init__.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/reducer/base_selector_reducer.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/reducer/combined_reducer.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/reducer/constant_reducer.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/reducer/correlation_reducer.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/reducer/duplicate_reducer.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/reducer/non_categorical_numeric_columns.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/reducer/nonnumeric_reducer.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/reducer/pca_reducer.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/reducer/reducer.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/reducer/select_by_single_feature_performance_reducer.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/reducer/smart_correlation_reducer.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/reducer/unseen_reducer.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/selector/__init__.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/selector/selector.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/trainer.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/weights/__init__.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/weights/class_weights.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/weights/combined_weights.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/weights/exponential_weights.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/weights/linear_weights.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/weights/noop_weights.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/weights/sigmoid_weights.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/weights/weights.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/weights/weights_router.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/windower/__init__.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/windower/windower.py +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer.egg-info/dependency_links.txt +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer.egg-info/not-zip-safe +0 -0
- {wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: wavetrainer
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.11
|
4
4
|
Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
|
5
5
|
Home-page: https://github.com/8W9aG/wavetrainer
|
6
6
|
Author: Will Sackfield
|
@@ -29,6 +29,7 @@ Requires-Dist: jax>=0.6.1
|
|
29
29
|
Requires-Dist: tabpfn_extensions>=0.0.4
|
30
30
|
Requires-Dist: hyperopt>=0.2.7
|
31
31
|
Requires-Dist: pycaleva>=0.8.2
|
32
|
+
Requires-Dist: lightgbm>=4.6.0
|
32
33
|
|
33
34
|
# wavetrainer
|
34
35
|
|
@@ -64,6 +65,7 @@ Python 3.11.6:
|
|
64
65
|
- [tabpfn_extensions](https://github.com/PriorLabs/tabpfn-extensions)
|
65
66
|
- [hyperopt](https://github.com/hyperopt/hyperopt)
|
66
67
|
- [pycaleva](https://github.com/MartinWeigl/pycaleva)
|
68
|
+
- [lightgbm](https://github.com/microsoft/LightGBM)
|
67
69
|
|
68
70
|
## Raison D'être :thought_balloon:
|
69
71
|
|
@@ -32,6 +32,7 @@ Python 3.11.6:
|
|
32
32
|
- [tabpfn_extensions](https://github.com/PriorLabs/tabpfn-extensions)
|
33
33
|
- [hyperopt](https://github.com/hyperopt/hyperopt)
|
34
34
|
- [pycaleva](https://github.com/MartinWeigl/pycaleva)
|
35
|
+
- [lightgbm](https://github.com/microsoft/LightGBM)
|
35
36
|
|
36
37
|
## Raison D'être :thought_balloon:
|
37
38
|
|
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
|
|
23
23
|
|
24
24
|
setup(
|
25
25
|
name='wavetrainer',
|
26
|
-
version='0.1.
|
26
|
+
version='0.1.11',
|
27
27
|
description='A library for automatically finding the optimal model within feature and hyperparameter space.',
|
28
28
|
long_description=long_description,
|
29
29
|
long_description_content_type='text/markdown',
|
@@ -0,0 +1 @@
|
|
1
|
+
"""The wavetrain lightgbm model module."""
|
@@ -0,0 +1,245 @@
|
|
1
|
+
"""A model that wraps lightgbm."""
|
2
|
+
|
3
|
+
# pylint: disable=duplicate-code,too-many-arguments,too-many-positional-arguments,too-many-instance-attributes
|
4
|
+
import json
|
5
|
+
import os
|
6
|
+
from typing import Self
|
7
|
+
|
8
|
+
import joblib # type: ignore
|
9
|
+
import lightgbm as lgb
|
10
|
+
import optuna
|
11
|
+
import pandas as pd
|
12
|
+
import torch
|
13
|
+
|
14
|
+
from ...exceptions import WavetrainException
|
15
|
+
from ...model_type import ModelType, determine_model_type
|
16
|
+
from ..model import PREDICTION_COLUMN, PROBABILITY_COLUMN_PREFIX, Model
|
17
|
+
|
18
|
+
_BOOSTING_TYPE_KEY = "gbm_boosting_type"
|
19
|
+
_NUM_LEAVES_KEY = "gbm_num_leaves"
|
20
|
+
_MIN_CHILD_SAMPLES_KEY = "gbm_min_child_samples"
|
21
|
+
_MODEL_PARAMS_FILENAME = "model_params.json"
|
22
|
+
_MODEL_FILENAME = "model.pkl"
|
23
|
+
_BEST_ITERATION_KEY = "best_iteration"
|
24
|
+
_EARLY_STOPPING_ROUNDS_KEY = "gbm_early_stopping_rounds"
|
25
|
+
_ITERATIONS_KEY = "gbm_iterations"
|
26
|
+
|
27
|
+
|
28
|
+
class LightGBMModel(Model):
|
29
|
+
"""A class that uses lightgbm as a model."""
|
30
|
+
|
31
|
+
_gbm: lgb.LGBMModel | None
|
32
|
+
_boosting_type: str | None
|
33
|
+
_num_leaves: int | None
|
34
|
+
_min_child_samples: int | None
|
35
|
+
_model_type: None | ModelType
|
36
|
+
_best_iteration: None | int
|
37
|
+
_early_stopping_rounds: None | int
|
38
|
+
_iterations: None | int
|
39
|
+
|
40
|
+
@classmethod
|
41
|
+
def name(cls) -> str:
|
42
|
+
return "lightgbm"
|
43
|
+
|
44
|
+
@classmethod
|
45
|
+
def supports_x(cls, df: pd.DataFrame) -> bool:
|
46
|
+
return True
|
47
|
+
|
48
|
+
def __init__(self) -> None:
|
49
|
+
super().__init__()
|
50
|
+
self._gbm = None
|
51
|
+
self._boosting_type = None
|
52
|
+
self._num_leaves = None
|
53
|
+
self._min_child_samples = None
|
54
|
+
self._model_type = None
|
55
|
+
self._best_iteration = None
|
56
|
+
self._early_stopping_rounds = None
|
57
|
+
self._iterations = None
|
58
|
+
|
59
|
+
@property
|
60
|
+
def supports_importances(self) -> bool:
|
61
|
+
return True
|
62
|
+
|
63
|
+
@property
|
64
|
+
def feature_importances(self) -> dict[str, float]:
|
65
|
+
gbm = self._provide_gbm()
|
66
|
+
importances = gbm.feature_importances_
|
67
|
+
names = gbm.feature_name_
|
68
|
+
total_importances = sum(importances)
|
69
|
+
return {
|
70
|
+
names[count]: importance / total_importances
|
71
|
+
for count, importance in enumerate(importances)
|
72
|
+
}
|
73
|
+
|
74
|
+
def provide_estimator(self):
|
75
|
+
return self._provide_gbm()
|
76
|
+
|
77
|
+
def create_estimator(self):
|
78
|
+
return self._create_gbm()
|
79
|
+
|
80
|
+
def reset(self):
|
81
|
+
self._gbm = None
|
82
|
+
self._best_iteration = None
|
83
|
+
|
84
|
+
def convert_df(self, df: pd.DataFrame) -> pd.DataFrame:
|
85
|
+
return df
|
86
|
+
|
87
|
+
def set_options(
|
88
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
89
|
+
) -> None:
|
90
|
+
self._boosting_type = trial.suggest_categorical(
|
91
|
+
_BOOSTING_TYPE_KEY, ["gbdt", "dart", "rf"]
|
92
|
+
)
|
93
|
+
self._num_leaves = trial.suggest_int(_NUM_LEAVES_KEY, 2, 256)
|
94
|
+
self._min_child_samples = trial.suggest_int(_MIN_CHILD_SAMPLES_KEY, 5, 100)
|
95
|
+
self._best_iteration = trial.user_attrs.get(_BEST_ITERATION_KEY)
|
96
|
+
self._early_stopping_rounds = trial.suggest_int(
|
97
|
+
_EARLY_STOPPING_ROUNDS_KEY, 10, 500
|
98
|
+
)
|
99
|
+
self._iterations = trial.suggest_int(_ITERATIONS_KEY, 100, 10000)
|
100
|
+
|
101
|
+
def load(self, folder: str) -> None:
|
102
|
+
with open(
|
103
|
+
os.path.join(folder, _MODEL_PARAMS_FILENAME), encoding="utf8"
|
104
|
+
) as handle:
|
105
|
+
params = json.load(handle)
|
106
|
+
self._boosting_type = params[_BOOSTING_TYPE_KEY]
|
107
|
+
self._num_leaves = params[_NUM_LEAVES_KEY]
|
108
|
+
self._min_child_samples = params[_MIN_CHILD_SAMPLES_KEY]
|
109
|
+
self._best_iteration = params.get(_BEST_ITERATION_KEY)
|
110
|
+
self._early_stopping_rounds = params[_EARLY_STOPPING_ROUNDS_KEY]
|
111
|
+
self._iterations = params[_ITERATIONS_KEY]
|
112
|
+
self._gbm = joblib.load(os.path.join(folder, _MODEL_FILENAME))
|
113
|
+
|
114
|
+
def save(self, folder: str, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
|
115
|
+
with open(
|
116
|
+
os.path.join(folder, _MODEL_PARAMS_FILENAME), "w", encoding="utf8"
|
117
|
+
) as handle:
|
118
|
+
json.dump(
|
119
|
+
{
|
120
|
+
_BOOSTING_TYPE_KEY: self._boosting_type,
|
121
|
+
_NUM_LEAVES_KEY: self._num_leaves,
|
122
|
+
_MIN_CHILD_SAMPLES_KEY: self._min_child_samples,
|
123
|
+
_BEST_ITERATION_KEY: self._best_iteration,
|
124
|
+
_EARLY_STOPPING_ROUNDS_KEY: self._early_stopping_rounds,
|
125
|
+
_ITERATIONS_KEY: self._iterations,
|
126
|
+
},
|
127
|
+
handle,
|
128
|
+
)
|
129
|
+
gbm = self._provide_gbm()
|
130
|
+
joblib.dump(gbm, os.path.join(folder, _MODEL_FILENAME))
|
131
|
+
trial.set_user_attr(_BEST_ITERATION_KEY, self._best_iteration)
|
132
|
+
|
133
|
+
def fit(
|
134
|
+
self,
|
135
|
+
df: pd.DataFrame,
|
136
|
+
y: pd.Series | pd.DataFrame | None = None,
|
137
|
+
w: pd.Series | None = None,
|
138
|
+
eval_x: pd.DataFrame | None = None,
|
139
|
+
eval_y: pd.Series | pd.DataFrame | None = None,
|
140
|
+
) -> Self:
|
141
|
+
if y is None:
|
142
|
+
raise ValueError("y is null.")
|
143
|
+
self._model_type = determine_model_type(y)
|
144
|
+
gbm = self._provide_gbm()
|
145
|
+
early_stopping_rounds = self._early_stopping_rounds
|
146
|
+
if early_stopping_rounds is None:
|
147
|
+
raise ValueError("early_stopping_rounds is null")
|
148
|
+
|
149
|
+
eval_set = None
|
150
|
+
callbacks = []
|
151
|
+
if eval_x is None or eval_y is None:
|
152
|
+
eval_set = [(eval_x, eval_y.to_numpy().flatten())] # type: ignore
|
153
|
+
callbacks = [
|
154
|
+
lgb.early_stopping(stopping_rounds=early_stopping_rounds),
|
155
|
+
]
|
156
|
+
if self._best_iteration is not None:
|
157
|
+
eval_set = None
|
158
|
+
callbacks = []
|
159
|
+
try:
|
160
|
+
gbm.fit(
|
161
|
+
X=df,
|
162
|
+
y=y.to_numpy().flatten(),
|
163
|
+
sample_weight=w,
|
164
|
+
eval_set=eval_set, # type: ignore
|
165
|
+
callbacks=callbacks, # type: ignore
|
166
|
+
)
|
167
|
+
except lgb.basic.LightGBMError as exc:
|
168
|
+
raise WavetrainException() from exc
|
169
|
+
self._best_iteration = gbm.best_iteration_
|
170
|
+
return self
|
171
|
+
|
172
|
+
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
173
|
+
gbm = self._provide_gbm()
|
174
|
+
pred = gbm.predict(df)
|
175
|
+
pred_df = pd.DataFrame(
|
176
|
+
index=df.index,
|
177
|
+
data={
|
178
|
+
PREDICTION_COLUMN: pred.flatten(), # type: ignore
|
179
|
+
},
|
180
|
+
)
|
181
|
+
if self._model_type != ModelType.REGRESSION:
|
182
|
+
proba = gbm.predict_proba(df) # type: ignore
|
183
|
+
for i in range(proba.shape[1]):
|
184
|
+
pred_df[f"{PROBABILITY_COLUMN_PREFIX}{i}"] = proba[:, i]
|
185
|
+
return pred_df
|
186
|
+
|
187
|
+
def _provide_gbm(self) -> lgb.LGBMModel:
|
188
|
+
gbm = self._gbm
|
189
|
+
if gbm is None:
|
190
|
+
gbm = self._create_gbm()
|
191
|
+
self._gbm = gbm
|
192
|
+
if gbm is None:
|
193
|
+
raise ValueError("gbm is null")
|
194
|
+
return gbm
|
195
|
+
|
196
|
+
def _create_gbm(self) -> lgb.LGBMModel:
|
197
|
+
best_iteration = self._best_iteration
|
198
|
+
iterations = best_iteration if best_iteration is not None else self._iterations
|
199
|
+
boosting_type = self._boosting_type
|
200
|
+
if boosting_type is None:
|
201
|
+
raise ValueError("boosting_type is null")
|
202
|
+
num_leaves = self._num_leaves
|
203
|
+
if num_leaves is None:
|
204
|
+
raise ValueError("num_leaves is null")
|
205
|
+
min_child_samples = self._min_child_samples
|
206
|
+
if min_child_samples is None:
|
207
|
+
raise ValueError("min_child_samples is null")
|
208
|
+
|
209
|
+
match self._model_type:
|
210
|
+
case ModelType.BINARY:
|
211
|
+
return lgb.LGBMClassifier(
|
212
|
+
boosting_type=boosting_type,
|
213
|
+
num_leaves=num_leaves,
|
214
|
+
objective="binary",
|
215
|
+
min_child_samples=min_child_samples,
|
216
|
+
num_iterations=iterations,
|
217
|
+
device="gpu" if torch.cuda.is_available() else None,
|
218
|
+
)
|
219
|
+
case ModelType.REGRESSION:
|
220
|
+
return lgb.LGBMRegressor(
|
221
|
+
boosting_type=boosting_type,
|
222
|
+
num_leaves=num_leaves,
|
223
|
+
min_child_samples=min_child_samples,
|
224
|
+
num_iterations=iterations,
|
225
|
+
device="gpu" if torch.cuda.is_available() else None,
|
226
|
+
)
|
227
|
+
case ModelType.BINNED_BINARY:
|
228
|
+
return lgb.LGBMClassifier(
|
229
|
+
boosting_type=boosting_type,
|
230
|
+
num_leaves=num_leaves,
|
231
|
+
objective="binary",
|
232
|
+
min_child_samples=min_child_samples,
|
233
|
+
num_iterations=iterations,
|
234
|
+
device="gpu" if torch.cuda.is_available() else None,
|
235
|
+
)
|
236
|
+
case ModelType.MULTI_CLASSIFICATION:
|
237
|
+
return lgb.LGBMClassifier(
|
238
|
+
boosting_type=boosting_type,
|
239
|
+
num_leaves=num_leaves,
|
240
|
+
min_child_samples=min_child_samples,
|
241
|
+
num_iterations=iterations,
|
242
|
+
device="gpu" if torch.cuda.is_available() else None,
|
243
|
+
)
|
244
|
+
case _:
|
245
|
+
raise ValueError(f"Unrecognised model type: {self._model_type}")
|
@@ -11,6 +11,7 @@ from sklearn.metrics import accuracy_score # type: ignore
|
|
11
11
|
|
12
12
|
from ..model_type import ModelType, determine_model_type
|
13
13
|
from .catboost.catboost_model import CatboostModel
|
14
|
+
from .lightgbm.lightgbm_model import LightGBMModel
|
14
15
|
from .model import PREDICTION_COLUMN, PROBABILITY_COLUMN_PREFIX, Model
|
15
16
|
from .tabpfn.tabpfn_model import TabPFNModel
|
16
17
|
from .xgboost.xgboost_model import XGBoostModel
|
@@ -22,6 +23,7 @@ _MODELS = {
|
|
22
23
|
CatboostModel.name(): CatboostModel,
|
23
24
|
TabPFNModel.name(): TabPFNModel,
|
24
25
|
XGBoostModel.name(): XGBoostModel,
|
26
|
+
LightGBMModel.name(): LightGBMModel,
|
25
27
|
}
|
26
28
|
|
27
29
|
|
@@ -53,7 +53,7 @@ def _convert_categoricals(input_df: pd.DataFrame) -> pd.DataFrame:
|
|
53
53
|
output_df = input_df.copy()
|
54
54
|
for col in input_df.select_dtypes(include=["category"]).columns:
|
55
55
|
output_df[col] = output_df[col].cat.codes
|
56
|
-
return output_df
|
56
|
+
return output_df.replace([np.inf, -np.inf], np.nan)
|
57
57
|
|
58
58
|
|
59
59
|
class XGBoostModel(Model):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: wavetrainer
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.11
|
4
4
|
Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
|
5
5
|
Home-page: https://github.com/8W9aG/wavetrainer
|
6
6
|
Author: Will Sackfield
|
@@ -29,6 +29,7 @@ Requires-Dist: jax>=0.6.1
|
|
29
29
|
Requires-Dist: tabpfn_extensions>=0.0.4
|
30
30
|
Requires-Dist: hyperopt>=0.2.7
|
31
31
|
Requires-Dist: pycaleva>=0.8.2
|
32
|
+
Requires-Dist: lightgbm>=4.6.0
|
32
33
|
|
33
34
|
# wavetrainer
|
34
35
|
|
@@ -64,6 +65,7 @@ Python 3.11.6:
|
|
64
65
|
- [tabpfn_extensions](https://github.com/PriorLabs/tabpfn-extensions)
|
65
66
|
- [hyperopt](https://github.com/hyperopt/hyperopt)
|
66
67
|
- [pycaleva](https://github.com/MartinWeigl/pycaleva)
|
68
|
+
- [lightgbm](https://github.com/microsoft/LightGBM)
|
67
69
|
|
68
70
|
## Raison D'être :thought_balloon:
|
69
71
|
|
@@ -32,6 +32,8 @@ wavetrainer/model/catboost/catboost_classifier_wrap.py
|
|
32
32
|
wavetrainer/model/catboost/catboost_kwargs.py
|
33
33
|
wavetrainer/model/catboost/catboost_model.py
|
34
34
|
wavetrainer/model/catboost/catboost_regressor_wrap.py
|
35
|
+
wavetrainer/model/lightgbm/__init__.py
|
36
|
+
wavetrainer/model/lightgbm/lightgbm_model.py
|
35
37
|
wavetrainer/model/tabpfn/__init__.py
|
36
38
|
wavetrainer/model/tabpfn/tabpfn_model.py
|
37
39
|
wavetrainer/model/xgboost/__init__.py
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/model/catboost/catboost_classifier_wrap.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/model/catboost/catboost_regressor_wrap.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{wavetrainer-0.1.10 → wavetrainer-0.1.11}/wavetrainer/reducer/non_categorical_numeric_columns.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|