unifiedbooster 0.3.0__tar.gz → 0.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {unifiedbooster-0.3.0 → unifiedbooster-0.4.2}/PKG-INFO +2 -1
- {unifiedbooster-0.3.0 → unifiedbooster-0.4.2}/setup.py +1 -2
- unifiedbooster-0.4.2/unifiedbooster/__init__.py +12 -0
- {unifiedbooster-0.3.0 → unifiedbooster-0.4.2}/unifiedbooster/gbdt.py +4 -1
- {unifiedbooster-0.3.0 → unifiedbooster-0.4.2}/unifiedbooster/gbdt_classification.py +12 -3
- {unifiedbooster-0.3.0 → unifiedbooster-0.4.2}/unifiedbooster/gbdt_regression.py +12 -3
- unifiedbooster-0.4.2/unifiedbooster/gpoptimization.py +422 -0
- {unifiedbooster-0.3.0 → unifiedbooster-0.4.2}/unifiedbooster.egg-info/PKG-INFO +2 -1
- {unifiedbooster-0.3.0 → unifiedbooster-0.4.2}/unifiedbooster.egg-info/SOURCES.txt +1 -0
- {unifiedbooster-0.3.0 → unifiedbooster-0.4.2}/unifiedbooster.egg-info/requires.txt +1 -0
- unifiedbooster-0.3.0/unifiedbooster/__init__.py +0 -5
- {unifiedbooster-0.3.0 → unifiedbooster-0.4.2}/LICENSE +0 -0
- {unifiedbooster-0.3.0 → unifiedbooster-0.4.2}/README.md +0 -0
- {unifiedbooster-0.3.0 → unifiedbooster-0.4.2}/setup.cfg +0 -0
- {unifiedbooster-0.3.0 → unifiedbooster-0.4.2}/unifiedbooster.egg-info/dependency_links.txt +0 -0
- {unifiedbooster-0.3.0 → unifiedbooster-0.4.2}/unifiedbooster.egg-info/entry_points.txt +0 -0
- {unifiedbooster-0.3.0 → unifiedbooster-0.4.2}/unifiedbooster.egg-info/not-zip-safe +0 -0
- {unifiedbooster-0.3.0 → unifiedbooster-0.4.2}/unifiedbooster.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: unifiedbooster
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.2
|
|
4
4
|
Summary: Unified interface for Gradient Boosted Decision Trees
|
|
5
5
|
Home-page: https://github.com/thierrymoudiki/unifiedbooster
|
|
6
6
|
Author: T. Moudiki
|
|
@@ -24,5 +24,6 @@ Requires-Dist: xgboost
|
|
|
24
24
|
Requires-Dist: lightgbm
|
|
25
25
|
Requires-Dist: catboost
|
|
26
26
|
Requires-Dist: GPopt
|
|
27
|
+
Requires-Dist: nnetsauce
|
|
27
28
|
|
|
28
29
|
Unified interface for Gradient Boosted Decision Trees
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
"""The setup script."""
|
|
4
4
|
|
|
5
|
-
import platform
|
|
6
5
|
import subprocess
|
|
7
6
|
from setuptools import setup, find_packages
|
|
8
7
|
from codecs import open
|
|
@@ -10,7 +9,7 @@ from os import path
|
|
|
10
9
|
|
|
11
10
|
subprocess.check_call(['pip', 'install', 'Cython'])
|
|
12
11
|
|
|
13
|
-
__version__ = "0.
|
|
12
|
+
__version__ = "0.4.2"
|
|
14
13
|
|
|
15
14
|
here = path.abspath(path.dirname(__file__))
|
|
16
15
|
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from .gbdt import GBDT
|
|
2
|
+
from .gbdt_classification import GBDTClassifier
|
|
3
|
+
from .gbdt_regression import GBDTRegressor
|
|
4
|
+
from .gpoptimization import cross_val_optim, lazy_cross_val_optim
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"GBDT",
|
|
8
|
+
"GBDTClassifier",
|
|
9
|
+
"GBDTRegressor",
|
|
10
|
+
"cross_val_optim",
|
|
11
|
+
"lazy_cross_val_optim",
|
|
12
|
+
]
|
|
@@ -17,6 +17,9 @@ class GBDT(BaseEstimator):
|
|
|
17
17
|
learning_rate: float
|
|
18
18
|
shrinkage rate; used for reducing the gradient step
|
|
19
19
|
|
|
20
|
+
max_depth: int
|
|
21
|
+
maximum tree depth
|
|
22
|
+
|
|
20
23
|
rowsample: float
|
|
21
24
|
subsample ratio of the training instances
|
|
22
25
|
|
|
@@ -87,7 +90,7 @@ class GBDT(BaseEstimator):
|
|
|
87
90
|
"depth": self.max_depth,
|
|
88
91
|
"verbose": self.verbose,
|
|
89
92
|
"random_seed": self.seed,
|
|
90
|
-
"bootstrap_type": "
|
|
93
|
+
"bootstrap_type": "MVS",
|
|
91
94
|
**kwargs,
|
|
92
95
|
}
|
|
93
96
|
elif self.model_type == "gradientboosting":
|
|
@@ -1,12 +1,18 @@
|
|
|
1
1
|
from .gbdt import GBDT
|
|
2
2
|
from sklearn.base import ClassifierMixin
|
|
3
|
-
from xgboost import XGBClassifier
|
|
4
3
|
|
|
4
|
+
try:
|
|
5
|
+
from xgboost import XGBClassifier
|
|
6
|
+
except:
|
|
7
|
+
pass
|
|
5
8
|
try:
|
|
6
9
|
from catboost import CatBoostClassifier
|
|
7
10
|
except:
|
|
8
|
-
|
|
9
|
-
|
|
11
|
+
pass
|
|
12
|
+
try:
|
|
13
|
+
from lightgbm import LGBMClassifier
|
|
14
|
+
except:
|
|
15
|
+
pass
|
|
10
16
|
from sklearn.ensemble import GradientBoostingClassifier
|
|
11
17
|
|
|
12
18
|
|
|
@@ -25,6 +31,9 @@ class GBDTClassifier(GBDT, ClassifierMixin):
|
|
|
25
31
|
learning_rate: float
|
|
26
32
|
shrinkage rate; used for reducing the gradient step
|
|
27
33
|
|
|
34
|
+
max_depth: int
|
|
35
|
+
maximum tree depth
|
|
36
|
+
|
|
28
37
|
rowsample: float
|
|
29
38
|
subsample ratio of the training instances
|
|
30
39
|
|
|
@@ -1,12 +1,18 @@
|
|
|
1
1
|
from .gbdt import GBDT
|
|
2
2
|
from sklearn.base import RegressorMixin
|
|
3
|
-
from xgboost import XGBRegressor
|
|
4
3
|
|
|
4
|
+
try:
|
|
5
|
+
from xgboost import XGBRegressor
|
|
6
|
+
except:
|
|
7
|
+
pass
|
|
5
8
|
try:
|
|
6
9
|
from catboost import CatBoostRegressor
|
|
7
10
|
except:
|
|
8
|
-
|
|
9
|
-
|
|
11
|
+
pass
|
|
12
|
+
try:
|
|
13
|
+
from lightgbm import LGBMRegressor
|
|
14
|
+
except:
|
|
15
|
+
pass
|
|
10
16
|
from sklearn.ensemble import GradientBoostingRegressor
|
|
11
17
|
|
|
12
18
|
|
|
@@ -25,6 +31,9 @@ class GBDTRegressor(GBDT, RegressorMixin):
|
|
|
25
31
|
learning_rate: float
|
|
26
32
|
shrinkage rate; used for reducing the gradient step
|
|
27
33
|
|
|
34
|
+
max_depth: int
|
|
35
|
+
maximum tree depth
|
|
36
|
+
|
|
28
37
|
rowsample: float
|
|
29
38
|
subsample ratio of the training instances
|
|
30
39
|
|
|
@@ -0,0 +1,422 @@
|
|
|
1
|
+
import GPopt as gp
|
|
2
|
+
import nnetsauce as ns
|
|
3
|
+
import numpy as np
|
|
4
|
+
from collections import namedtuple
|
|
5
|
+
from .gbdt_classification import GBDTClassifier
|
|
6
|
+
from .gbdt_regression import GBDTRegressor
|
|
7
|
+
from sklearn.model_selection import cross_val_score
|
|
8
|
+
from sklearn.base import ClassifierMixin, RegressorMixin
|
|
9
|
+
from sklearn.utils import all_estimators
|
|
10
|
+
from sklearn import metrics
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def cross_val_optim(
|
|
14
|
+
X_train,
|
|
15
|
+
y_train,
|
|
16
|
+
X_test=None,
|
|
17
|
+
y_test=None,
|
|
18
|
+
model_type="xgboost",
|
|
19
|
+
type_fit="classification",
|
|
20
|
+
scoring="accuracy",
|
|
21
|
+
n_estimators=100,
|
|
22
|
+
surrogate_obj=None,
|
|
23
|
+
cv=5,
|
|
24
|
+
n_jobs=None,
|
|
25
|
+
n_init=10,
|
|
26
|
+
n_iter=190,
|
|
27
|
+
abs_tol=1e-3,
|
|
28
|
+
verbose=2,
|
|
29
|
+
seed=123,
|
|
30
|
+
):
|
|
31
|
+
"""Cross-validation function and hyperparameters' search
|
|
32
|
+
|
|
33
|
+
Parameters:
|
|
34
|
+
|
|
35
|
+
X_train: array-like,
|
|
36
|
+
Training vectors, where rows is the number of samples
|
|
37
|
+
and columns is the number of features.
|
|
38
|
+
|
|
39
|
+
y_train: array-like,
|
|
40
|
+
Training vectors, where rows is the number of samples
|
|
41
|
+
and columns is the number of features.
|
|
42
|
+
|
|
43
|
+
X_test: array-like,
|
|
44
|
+
Testing vectors, where rows is the number of samples
|
|
45
|
+
and columns is the number of features.
|
|
46
|
+
|
|
47
|
+
y_test: array-like,
|
|
48
|
+
Testing vectors, where rows is the number of samples
|
|
49
|
+
and columns is the number of features.
|
|
50
|
+
|
|
51
|
+
model_type: str
|
|
52
|
+
type of gradient boosting algorithm: 'xgboost', 'lightgbm',
|
|
53
|
+
'catboost', 'gradientboosting'
|
|
54
|
+
|
|
55
|
+
type_fit: str
|
|
56
|
+
"regression" or "classification"
|
|
57
|
+
|
|
58
|
+
scoring: str
|
|
59
|
+
scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules
|
|
60
|
+
|
|
61
|
+
n_estimators: int
|
|
62
|
+
maximum number of trees that can be built
|
|
63
|
+
|
|
64
|
+
surrogate_obj: an object;
|
|
65
|
+
An ML model for estimating the uncertainty around the objective function
|
|
66
|
+
|
|
67
|
+
cv: int;
|
|
68
|
+
number of cross-validation folds
|
|
69
|
+
|
|
70
|
+
n_jobs: int;
|
|
71
|
+
number of jobs for parallel execution
|
|
72
|
+
|
|
73
|
+
n_init: an integer;
|
|
74
|
+
number of points in the initial setting, when `x_init` and `y_init` are not provided
|
|
75
|
+
|
|
76
|
+
n_iter: an integer;
|
|
77
|
+
number of iterations of the minimization algorithm
|
|
78
|
+
|
|
79
|
+
abs_tol: a float;
|
|
80
|
+
tolerance for convergence of the optimizer (early stopping based on acquisition function)
|
|
81
|
+
|
|
82
|
+
verbose: int
|
|
83
|
+
controls verbosity
|
|
84
|
+
|
|
85
|
+
seed: int
|
|
86
|
+
reproducibility seed
|
|
87
|
+
|
|
88
|
+
Examples:
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
import unifiedbooster as ub
|
|
92
|
+
from sklearn.datasets import load_breast_cancer
|
|
93
|
+
from sklearn.model_selection import train_test_split
|
|
94
|
+
|
|
95
|
+
dataset = load_breast_cancer()
|
|
96
|
+
X, y = dataset.data, dataset.target
|
|
97
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
98
|
+
X, y, test_size=0.2, random_state=42
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
res1 = ub.cross_val_optim(
|
|
102
|
+
X_train,
|
|
103
|
+
y_train,
|
|
104
|
+
X_test=None,
|
|
105
|
+
y_test=None,
|
|
106
|
+
model_type="lightgbm",
|
|
107
|
+
type_fit="classification",
|
|
108
|
+
scoring="accuracy",
|
|
109
|
+
n_estimators=100,
|
|
110
|
+
surrogate_obj=None,
|
|
111
|
+
cv=5,
|
|
112
|
+
n_jobs=None,
|
|
113
|
+
n_init=10,
|
|
114
|
+
n_iter=190,
|
|
115
|
+
abs_tol=1e-3,
|
|
116
|
+
verbose=2,
|
|
117
|
+
seed=123,
|
|
118
|
+
)
|
|
119
|
+
print(res1)
|
|
120
|
+
```
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
def gbdt_cv(
|
|
124
|
+
X_train,
|
|
125
|
+
y_train,
|
|
126
|
+
model_type="xgboost",
|
|
127
|
+
n_estimators=100,
|
|
128
|
+
learning_rate=0.1,
|
|
129
|
+
max_depth=3,
|
|
130
|
+
rowsample=1.0,
|
|
131
|
+
colsample=1.0,
|
|
132
|
+
cv=5,
|
|
133
|
+
n_jobs=None,
|
|
134
|
+
type_fit="classification",
|
|
135
|
+
scoring="accuracy",
|
|
136
|
+
seed=123,
|
|
137
|
+
):
|
|
138
|
+
if type_fit == "regression":
|
|
139
|
+
estimator = GBDTRegressor(
|
|
140
|
+
model_type=model_type,
|
|
141
|
+
n_estimators=n_estimators,
|
|
142
|
+
learning_rate=learning_rate,
|
|
143
|
+
max_depth=max_depth,
|
|
144
|
+
rowsample=rowsample,
|
|
145
|
+
colsample=colsample,
|
|
146
|
+
verbose=0,
|
|
147
|
+
seed=seed,
|
|
148
|
+
)
|
|
149
|
+
elif type_fit == "classification":
|
|
150
|
+
estimator = GBDTClassifier(
|
|
151
|
+
model_type=model_type,
|
|
152
|
+
n_estimators=n_estimators,
|
|
153
|
+
learning_rate=learning_rate,
|
|
154
|
+
max_depth=max_depth,
|
|
155
|
+
rowsample=rowsample,
|
|
156
|
+
colsample=colsample,
|
|
157
|
+
verbose=0,
|
|
158
|
+
seed=seed,
|
|
159
|
+
)
|
|
160
|
+
return -cross_val_score(
|
|
161
|
+
estimator,
|
|
162
|
+
X_train,
|
|
163
|
+
y_train,
|
|
164
|
+
scoring=scoring,
|
|
165
|
+
cv=cv,
|
|
166
|
+
n_jobs=n_jobs,
|
|
167
|
+
verbose=0,
|
|
168
|
+
).mean()
|
|
169
|
+
|
|
170
|
+
# objective function for hyperparams tuning
|
|
171
|
+
def crossval_objective(xx):
|
|
172
|
+
return gbdt_cv(
|
|
173
|
+
X_train=X_train,
|
|
174
|
+
y_train=y_train,
|
|
175
|
+
model_type=model_type,
|
|
176
|
+
n_estimators=n_estimators,
|
|
177
|
+
learning_rate=10 ** xx[0],
|
|
178
|
+
max_depth=int(xx[1]),
|
|
179
|
+
rowsample=xx[2],
|
|
180
|
+
colsample=xx[3],
|
|
181
|
+
cv=cv,
|
|
182
|
+
n_jobs=n_jobs,
|
|
183
|
+
type_fit=type_fit,
|
|
184
|
+
scoring=scoring,
|
|
185
|
+
seed=seed,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
if surrogate_obj is None:
|
|
189
|
+
gp_opt = gp.GPOpt(
|
|
190
|
+
objective_func=crossval_objective,
|
|
191
|
+
lower_bound=np.array([-6, 1, 0.5, 0.5]),
|
|
192
|
+
upper_bound=np.array([0, 16, 1.0, 1.0]),
|
|
193
|
+
params_names=[
|
|
194
|
+
"learning_rate",
|
|
195
|
+
"max_depth",
|
|
196
|
+
"rowsample",
|
|
197
|
+
"colsample",
|
|
198
|
+
],
|
|
199
|
+
method="bayesian",
|
|
200
|
+
n_init=n_init,
|
|
201
|
+
n_iter=n_iter,
|
|
202
|
+
seed=seed,
|
|
203
|
+
)
|
|
204
|
+
else:
|
|
205
|
+
gp_opt = gp.GPOpt(
|
|
206
|
+
objective_func=crossval_objective,
|
|
207
|
+
lower_bound=np.array([-6, 1, 0.5, 0.5]),
|
|
208
|
+
upper_bound=np.array([0, 16, 1.0, 1.0]),
|
|
209
|
+
params_names=[
|
|
210
|
+
"learning_rate",
|
|
211
|
+
"max_depth",
|
|
212
|
+
"rowsample",
|
|
213
|
+
"colsample",
|
|
214
|
+
],
|
|
215
|
+
acquisition="ucb",
|
|
216
|
+
method="splitconformal",
|
|
217
|
+
surrogate_obj=ns.PredictionInterval(
|
|
218
|
+
obj=surrogate_obj, method="splitconformal"
|
|
219
|
+
),
|
|
220
|
+
n_init=n_init,
|
|
221
|
+
n_iter=n_iter,
|
|
222
|
+
seed=seed,
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
res = gp_opt.optimize(verbose=verbose, abs_tol=abs_tol)
|
|
226
|
+
res.best_params["model_type"] = model_type
|
|
227
|
+
res.best_params["n_estimators"] = int(n_estimators)
|
|
228
|
+
res.best_params["learning_rate"] = 10 ** res.best_params["learning_rate"]
|
|
229
|
+
res.best_params["max_depth"] = int(res.best_params["max_depth"])
|
|
230
|
+
res.best_params["rowsample"] = res.best_params["rowsample"]
|
|
231
|
+
res.best_params["colsample"] = res.best_params["colsample"]
|
|
232
|
+
|
|
233
|
+
# out-of-sample error
|
|
234
|
+
if X_test is not None and y_test is not None:
|
|
235
|
+
if type_fit == "regression":
|
|
236
|
+
estimator = GBDTRegressor(**res.best_params, verbose=0, seed=seed)
|
|
237
|
+
elif type_fit == "classification":
|
|
238
|
+
estimator = GBDTClassifier(**res.best_params, verbose=0, seed=seed)
|
|
239
|
+
preds = estimator.fit(X_train, y_train).predict(X_test)
|
|
240
|
+
# check error on y_test
|
|
241
|
+
oos_err = getattr(metrics, scoring + "_score")(
|
|
242
|
+
y_true=y_test, y_pred=preds
|
|
243
|
+
)
|
|
244
|
+
result = namedtuple("result", res._fields + ("test_" + scoring,))
|
|
245
|
+
return result(*res, oos_err)
|
|
246
|
+
else:
|
|
247
|
+
return res
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def lazy_cross_val_optim(
|
|
251
|
+
X_train,
|
|
252
|
+
y_train,
|
|
253
|
+
X_test=None,
|
|
254
|
+
y_test=None,
|
|
255
|
+
model_type="xgboost",
|
|
256
|
+
type_fit="classification",
|
|
257
|
+
scoring="accuracy",
|
|
258
|
+
customize=False,
|
|
259
|
+
n_estimators=100,
|
|
260
|
+
cv=5,
|
|
261
|
+
n_jobs=None,
|
|
262
|
+
n_init=10,
|
|
263
|
+
n_iter=190,
|
|
264
|
+
abs_tol=1e-3,
|
|
265
|
+
verbose=1,
|
|
266
|
+
seed=123,
|
|
267
|
+
):
|
|
268
|
+
"""Automated Cross-validation function and hyperparameters' search using multiple surrogates
|
|
269
|
+
|
|
270
|
+
Parameters:
|
|
271
|
+
|
|
272
|
+
X_train: array-like,
|
|
273
|
+
Training vectors, where rows is the number of samples
|
|
274
|
+
and columns is the number of features.
|
|
275
|
+
|
|
276
|
+
y_train: array-like,
|
|
277
|
+
Training vectors, where rows is the number of samples
|
|
278
|
+
and columns is the number of features.
|
|
279
|
+
|
|
280
|
+
X_test: array-like,
|
|
281
|
+
Testing vectors, where rows is the number of samples
|
|
282
|
+
and columns is the number of features.
|
|
283
|
+
|
|
284
|
+
y_test: array-like,
|
|
285
|
+
Testing vectors, where rows is the number of samples
|
|
286
|
+
and columns is the number of features.
|
|
287
|
+
|
|
288
|
+
model_type: str
|
|
289
|
+
type of gradient boosting algorithm: 'xgboost', 'lightgbm',
|
|
290
|
+
'catboost', 'gradientboosting'
|
|
291
|
+
|
|
292
|
+
type_fit: str
|
|
293
|
+
"regression" or "classification"
|
|
294
|
+
|
|
295
|
+
scoring: str
|
|
296
|
+
scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules
|
|
297
|
+
|
|
298
|
+
customize: boolean
|
|
299
|
+
if True, the surrogate is transformed into a quasi-randomized network (default is False)
|
|
300
|
+
|
|
301
|
+
n_estimators: int
|
|
302
|
+
maximum number of trees that can be built
|
|
303
|
+
|
|
304
|
+
cv: int;
|
|
305
|
+
number of cross-validation folds
|
|
306
|
+
|
|
307
|
+
n_jobs: int;
|
|
308
|
+
number of jobs for parallel execution
|
|
309
|
+
|
|
310
|
+
n_init: an integer;
|
|
311
|
+
number of points in the initial setting, when `x_init` and `y_init` are not provided
|
|
312
|
+
|
|
313
|
+
n_iter: an integer;
|
|
314
|
+
number of iterations of the minimization algorithm
|
|
315
|
+
|
|
316
|
+
abs_tol: a float;
|
|
317
|
+
tolerance for convergence of the optimizer (early stopping based on acquisition function)
|
|
318
|
+
|
|
319
|
+
verbose: int
|
|
320
|
+
controls verbosity
|
|
321
|
+
|
|
322
|
+
seed: int
|
|
323
|
+
reproducibility seed
|
|
324
|
+
|
|
325
|
+
Examples:
|
|
326
|
+
|
|
327
|
+
```python
|
|
328
|
+
import unifiedbooster as ub
|
|
329
|
+
from sklearn.datasets import load_breast_cancer
|
|
330
|
+
from sklearn.model_selection import train_test_split
|
|
331
|
+
|
|
332
|
+
dataset = load_breast_cancer()
|
|
333
|
+
X, y = dataset.data, dataset.target
|
|
334
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
335
|
+
X, y, test_size=0.2, random_state=42
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
res1 = ub.cross_val_optim(
|
|
339
|
+
X_train,
|
|
340
|
+
y_train,
|
|
341
|
+
X_test=None,
|
|
342
|
+
y_test=None,
|
|
343
|
+
model_type="lightgbm",
|
|
344
|
+
type_fit="classification",
|
|
345
|
+
scoring="accuracy",
|
|
346
|
+
n_estimators=100,
|
|
347
|
+
surrogate_obj=None,
|
|
348
|
+
cv=5,
|
|
349
|
+
n_jobs=None,
|
|
350
|
+
n_init=10,
|
|
351
|
+
n_iter=190,
|
|
352
|
+
abs_tol=1e-3,
|
|
353
|
+
verbose=2,
|
|
354
|
+
seed=123,
|
|
355
|
+
)
|
|
356
|
+
print(res1)
|
|
357
|
+
```
|
|
358
|
+
"""
|
|
359
|
+
|
|
360
|
+
removed_regressors = [
|
|
361
|
+
"TheilSenRegressor",
|
|
362
|
+
"ARDRegression",
|
|
363
|
+
"CCA",
|
|
364
|
+
"GaussianProcessRegressor",
|
|
365
|
+
"GradientBoostingRegressor",
|
|
366
|
+
"HistGradientBoostingRegressor",
|
|
367
|
+
"IsotonicRegression",
|
|
368
|
+
"MultiOutputRegressor",
|
|
369
|
+
"MultiTaskElasticNet",
|
|
370
|
+
"MultiTaskElasticNetCV",
|
|
371
|
+
"MultiTaskLasso",
|
|
372
|
+
"MultiTaskLassoCV",
|
|
373
|
+
"OrthogonalMatchingPursuit",
|
|
374
|
+
"OrthogonalMatchingPursuitCV",
|
|
375
|
+
"PLSCanonical",
|
|
376
|
+
"PLSRegression",
|
|
377
|
+
"RadiusNeighborsRegressor",
|
|
378
|
+
"RegressorChain",
|
|
379
|
+
"StackingRegressor",
|
|
380
|
+
"VotingRegressor",
|
|
381
|
+
]
|
|
382
|
+
|
|
383
|
+
results = []
|
|
384
|
+
|
|
385
|
+
for est in all_estimators():
|
|
386
|
+
if issubclass(est[1], RegressorMixin) and (
|
|
387
|
+
est[0] not in removed_regressors
|
|
388
|
+
):
|
|
389
|
+
try:
|
|
390
|
+
if customize == True:
|
|
391
|
+
print(f"\n surrogate: CustomRegressor({est[0]})")
|
|
392
|
+
surr_obj = ns.CustomRegressor(obj=est[1]())
|
|
393
|
+
else:
|
|
394
|
+
print(f"\n surrogate: {est[0]}")
|
|
395
|
+
surr_obj = est[1]()
|
|
396
|
+
res = cross_val_optim(
|
|
397
|
+
X_train=X_train,
|
|
398
|
+
y_train=y_train,
|
|
399
|
+
X_test=X_test,
|
|
400
|
+
y_test=y_test,
|
|
401
|
+
model_type=model_type,
|
|
402
|
+
n_estimators=n_estimators,
|
|
403
|
+
surrogate_obj=surr_obj,
|
|
404
|
+
cv=cv,
|
|
405
|
+
n_jobs=n_jobs,
|
|
406
|
+
type_fit=type_fit,
|
|
407
|
+
scoring=scoring,
|
|
408
|
+
n_init=n_init,
|
|
409
|
+
n_iter=n_iter,
|
|
410
|
+
abs_tol=abs_tol,
|
|
411
|
+
verbose=verbose,
|
|
412
|
+
seed=seed,
|
|
413
|
+
)
|
|
414
|
+
print(f"\n result: {res}")
|
|
415
|
+
if customize == True:
|
|
416
|
+
results.append((f"CustomRegressor({est[0]})", res))
|
|
417
|
+
else:
|
|
418
|
+
results.append((est[0], res))
|
|
419
|
+
except:
|
|
420
|
+
pass
|
|
421
|
+
|
|
422
|
+
return results
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: unifiedbooster
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.2
|
|
4
4
|
Summary: Unified interface for Gradient Boosted Decision Trees
|
|
5
5
|
Home-page: https://github.com/thierrymoudiki/unifiedbooster
|
|
6
6
|
Author: T. Moudiki
|
|
@@ -24,5 +24,6 @@ Requires-Dist: xgboost
|
|
|
24
24
|
Requires-Dist: lightgbm
|
|
25
25
|
Requires-Dist: catboost
|
|
26
26
|
Requires-Dist: GPopt
|
|
27
|
+
Requires-Dist: nnetsauce
|
|
27
28
|
|
|
28
29
|
Unified interface for Gradient Boosted Decision Trees
|
|
@@ -5,6 +5,7 @@ unifiedbooster/__init__.py
|
|
|
5
5
|
unifiedbooster/gbdt.py
|
|
6
6
|
unifiedbooster/gbdt_classification.py
|
|
7
7
|
unifiedbooster/gbdt_regression.py
|
|
8
|
+
unifiedbooster/gpoptimization.py
|
|
8
9
|
unifiedbooster.egg-info/PKG-INFO
|
|
9
10
|
unifiedbooster.egg-info/SOURCES.txt
|
|
10
11
|
unifiedbooster.egg-info/dependency_links.txt
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|