unifiedbooster 0.4.2__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unifiedbooster/gbdt.py +3 -1
- unifiedbooster/gbdt_classification.py +36 -36
- unifiedbooster/gbdt_regression.py +36 -36
- unifiedbooster/gpoptimization.py +127 -63
- unifiedbooster/nonconformist/__init__.py +30 -0
- unifiedbooster/nonconformist/acp.py +381 -0
- unifiedbooster/nonconformist/base.py +156 -0
- unifiedbooster/nonconformist/cp.py +172 -0
- unifiedbooster/nonconformist/evaluation.py +486 -0
- unifiedbooster/nonconformist/icp.py +442 -0
- unifiedbooster/nonconformist/nc.py +610 -0
- unifiedbooster/nonconformist/util.py +9 -0
- {unifiedbooster-0.4.2.dist-info → unifiedbooster-0.6.0.dist-info}/METADATA +1 -1
- unifiedbooster-0.6.0.dist-info/RECORD +19 -0
- unifiedbooster-0.4.2.dist-info/RECORD +0 -11
- {unifiedbooster-0.4.2.dist-info → unifiedbooster-0.6.0.dist-info}/LICENSE +0 -0
- {unifiedbooster-0.4.2.dist-info → unifiedbooster-0.6.0.dist-info}/WHEEL +0 -0
- {unifiedbooster-0.4.2.dist-info → unifiedbooster-0.6.0.dist-info}/entry_points.txt +0 -0
- {unifiedbooster-0.4.2.dist-info → unifiedbooster-0.6.0.dist-info}/top_level.txt +0 -0
unifiedbooster/gbdt.py
CHANGED
|
@@ -90,7 +90,9 @@ class GBDT(BaseEstimator):
|
|
|
90
90
|
"depth": self.max_depth,
|
|
91
91
|
"verbose": self.verbose,
|
|
92
92
|
"random_seed": self.seed,
|
|
93
|
-
"
|
|
93
|
+
"boosting_type": "Plain",
|
|
94
|
+
"leaf_estimation_iterations": 1,
|
|
95
|
+
"bootstrap_type": "Bernoulli",
|
|
94
96
|
**kwargs,
|
|
95
97
|
}
|
|
96
98
|
elif self.model_type == "gradientboosting":
|
|
@@ -51,42 +51,42 @@ class GBDTClassifier(GBDT, ClassifierMixin):
|
|
|
51
51
|
|
|
52
52
|
Examples:
|
|
53
53
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
54
|
+
```python
|
|
55
|
+
import unifiedbooster as ub
|
|
56
|
+
from sklearn.datasets import load_iris
|
|
57
|
+
from sklearn.model_selection import train_test_split
|
|
58
|
+
from sklearn.metrics import accuracy_score
|
|
59
|
+
|
|
60
|
+
# Load dataset
|
|
61
|
+
iris = load_iris()
|
|
62
|
+
X, y = iris.data, iris.target
|
|
63
|
+
|
|
64
|
+
# Split dataset into training and testing sets
|
|
65
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
|
66
|
+
|
|
67
|
+
# Initialize the unified regressor (example with XGBoost)
|
|
68
|
+
regressor1 = ub.GBDTClassifier(model_type='xgboost')
|
|
69
|
+
#regressor2 = ub.GBDTClassifier(model_type='catboost')
|
|
70
|
+
regressor3 = ub.GBDTClassifier(model_type='lightgbm')
|
|
71
|
+
|
|
72
|
+
# Fit the model
|
|
73
|
+
regressor1.fit(X_train, y_train)
|
|
74
|
+
#regressor2.fit(X_train, y_train)
|
|
75
|
+
regressor3.fit(X_train, y_train)
|
|
76
|
+
|
|
77
|
+
# Predict on the test set
|
|
78
|
+
y_pred1 = regressor1.predict(X_test)
|
|
79
|
+
#y_pred2 = regressor2.predict(X_test)
|
|
80
|
+
y_pred3 = regressor3.predict(X_test)
|
|
81
|
+
|
|
82
|
+
# Evaluate the model
|
|
83
|
+
accuracy1 = accuracy_score(y_test, y_pred1)
|
|
84
|
+
#accuracy2 = accuracy_score(y_test, y_pred2)
|
|
85
|
+
accuracy3 = accuracy_score(y_test, y_pred3)
|
|
86
|
+
print(f"Classification Accuracy xgboost: {accuracy1:.2f}")
|
|
87
|
+
#print(f"Classification Accuracy catboost: {accuracy2:.2f}")
|
|
88
|
+
print(f"Classification Accuracy lightgbm: {accuracy3:.2f}")
|
|
89
|
+
```
|
|
90
90
|
"""
|
|
91
91
|
|
|
92
92
|
def __init__(
|
|
@@ -51,42 +51,42 @@ class GBDTRegressor(GBDT, RegressorMixin):
|
|
|
51
51
|
|
|
52
52
|
Examples:
|
|
53
53
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
54
|
+
```python
|
|
55
|
+
import unifiedbooster as ub
|
|
56
|
+
from sklearn.datasets import fetch_california_housing
|
|
57
|
+
from sklearn.model_selection import train_test_split
|
|
58
|
+
from sklearn.metrics import mean_squared_error
|
|
59
|
+
|
|
60
|
+
# Load dataset
|
|
61
|
+
housing = fetch_california_housing()
|
|
62
|
+
X, y = housing.data, housing.target
|
|
63
|
+
|
|
64
|
+
# Split dataset into training and testing sets
|
|
65
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
|
66
|
+
|
|
67
|
+
# Initialize the unified regressor (example with XGBoost)
|
|
68
|
+
regressor1 = ub.GBDTRegressor(model_type='xgboost')
|
|
69
|
+
#regressor2 = ub.GBDTRegressor(model_type='catboost')
|
|
70
|
+
regressor3 = ub.GBDTRegressor(model_type='lightgbm')
|
|
71
|
+
|
|
72
|
+
# Fit the model
|
|
73
|
+
regressor1.fit(X_train, y_train)
|
|
74
|
+
#regressor2.fit(X_train, y_train)
|
|
75
|
+
regressor3.fit(X_train, y_train)
|
|
76
|
+
|
|
77
|
+
# Predict on the test set
|
|
78
|
+
y_pred1 = regressor1.predict(X_test)
|
|
79
|
+
#y_pred2 = regressor2.predict(X_test)
|
|
80
|
+
y_pred3 = regressor3.predict(X_test)
|
|
81
|
+
|
|
82
|
+
# Evaluate the model
|
|
83
|
+
mse1 = mean_squared_error(y_test, y_pred1)
|
|
84
|
+
#mse2 = mean_squared_error(y_test, y_pred2)
|
|
85
|
+
mse3 = mean_squared_error(y_test, y_pred3)
|
|
86
|
+
print(f"Regression Mean Squared Error xgboost: {mse1:.2f}")
|
|
87
|
+
#print(f"Regression Mean Squared Error catboost: {mse2:.2f}")
|
|
88
|
+
print(f"Regression Mean Squared Error lightgbm: {mse3:.2f}")
|
|
89
|
+
```
|
|
90
90
|
"""
|
|
91
91
|
|
|
92
92
|
def __init__(
|
unifiedbooster/gpoptimization.py
CHANGED
|
@@ -18,7 +18,7 @@ def cross_val_optim(
|
|
|
18
18
|
model_type="xgboost",
|
|
19
19
|
type_fit="classification",
|
|
20
20
|
scoring="accuracy",
|
|
21
|
-
n_estimators=
|
|
21
|
+
n_estimators=None,
|
|
22
22
|
surrogate_obj=None,
|
|
23
23
|
cv=5,
|
|
24
24
|
n_jobs=None,
|
|
@@ -59,7 +59,7 @@ def cross_val_optim(
|
|
|
59
59
|
scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules
|
|
60
60
|
|
|
61
61
|
n_estimators: int
|
|
62
|
-
maximum number of trees that can be built
|
|
62
|
+
maximum number of trees that can be built (default is None, and if None, then the parameter is tuned)
|
|
63
63
|
|
|
64
64
|
surrogate_obj: an object;
|
|
65
65
|
An ML model for estimating the uncertainty around the objective function
|
|
@@ -168,63 +168,121 @@ def cross_val_optim(
|
|
|
168
168
|
).mean()
|
|
169
169
|
|
|
170
170
|
# objective function for hyperparams tuning
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
171
|
+
if n_estimators is not None:
|
|
172
|
+
def crossval_objective(xx):
|
|
173
|
+
return gbdt_cv(
|
|
174
|
+
X_train=X_train,
|
|
175
|
+
y_train=y_train,
|
|
176
|
+
model_type=model_type,
|
|
177
|
+
n_estimators=n_estimators,
|
|
178
|
+
learning_rate=10 ** xx[0],
|
|
179
|
+
max_depth=int(xx[1]),
|
|
180
|
+
rowsample=xx[2],
|
|
181
|
+
colsample=xx[3],
|
|
182
|
+
cv=cv,
|
|
183
|
+
n_jobs=n_jobs,
|
|
184
|
+
type_fit=type_fit,
|
|
185
|
+
scoring=scoring,
|
|
186
|
+
seed=seed,
|
|
187
|
+
)
|
|
188
|
+
else: # n_estimators is None
|
|
189
|
+
def crossval_objective(xx):
|
|
190
|
+
return gbdt_cv(
|
|
191
|
+
X_train=X_train,
|
|
192
|
+
y_train=y_train,
|
|
193
|
+
model_type=model_type,
|
|
194
|
+
n_estimators=int(10 ** xx[4]),
|
|
195
|
+
learning_rate=10 ** xx[0],
|
|
196
|
+
max_depth=int(xx[1]),
|
|
197
|
+
rowsample=xx[2],
|
|
198
|
+
colsample=xx[3],
|
|
199
|
+
cv=cv,
|
|
200
|
+
n_jobs=n_jobs,
|
|
201
|
+
type_fit=type_fit,
|
|
202
|
+
scoring=scoring,
|
|
203
|
+
seed=seed,
|
|
204
|
+
)
|
|
187
205
|
|
|
188
|
-
if
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
206
|
+
if n_estimators is not None:
|
|
207
|
+
if surrogate_obj is None:
|
|
208
|
+
gp_opt = gp.GPOpt(
|
|
209
|
+
objective_func=crossval_objective,
|
|
210
|
+
lower_bound=np.array([-6, 1, 0.5, 0.5]),
|
|
211
|
+
upper_bound=np.array([0, 16, 1.0, 1.0]),
|
|
212
|
+
params_names=[
|
|
213
|
+
"learning_rate",
|
|
214
|
+
"max_depth",
|
|
215
|
+
"rowsample",
|
|
216
|
+
"colsample",
|
|
217
|
+
],
|
|
218
|
+
method="bayesian",
|
|
219
|
+
n_init=n_init,
|
|
220
|
+
n_iter=n_iter,
|
|
221
|
+
seed=seed,
|
|
222
|
+
)
|
|
223
|
+
else:
|
|
224
|
+
gp_opt = gp.GPOpt(
|
|
225
|
+
objective_func=crossval_objective,
|
|
226
|
+
lower_bound=np.array([-6, 1, 0.5, 0.5]),
|
|
227
|
+
upper_bound=np.array([0, 16, 1.0, 1.0]),
|
|
228
|
+
params_names=[
|
|
229
|
+
"learning_rate",
|
|
230
|
+
"max_depth",
|
|
231
|
+
"rowsample",
|
|
232
|
+
"colsample",
|
|
233
|
+
],
|
|
234
|
+
acquisition="ucb",
|
|
235
|
+
method="splitconformal",
|
|
236
|
+
surrogate_obj=ns.PredictionInterval(
|
|
237
|
+
obj=surrogate_obj, method="splitconformal"
|
|
238
|
+
),
|
|
239
|
+
n_init=n_init,
|
|
240
|
+
n_iter=n_iter,
|
|
241
|
+
seed=seed,
|
|
242
|
+
)
|
|
243
|
+
else: # n_estimators is None
|
|
244
|
+
if surrogate_obj is None:
|
|
245
|
+
gp_opt = gp.GPOpt(
|
|
246
|
+
objective_func=crossval_objective,
|
|
247
|
+
lower_bound=np.array([-6, 1, 0.5, 0.5, 2]),
|
|
248
|
+
upper_bound=np.array([0, 16, 1.0, 1.0, 3]),
|
|
249
|
+
params_names=[
|
|
250
|
+
"learning_rate",
|
|
251
|
+
"max_depth",
|
|
252
|
+
"rowsample",
|
|
253
|
+
"colsample",
|
|
254
|
+
"n_estimators"
|
|
255
|
+
],
|
|
256
|
+
method="bayesian",
|
|
257
|
+
n_init=n_init,
|
|
258
|
+
n_iter=n_iter,
|
|
259
|
+
seed=seed,
|
|
260
|
+
)
|
|
261
|
+
else:
|
|
262
|
+
gp_opt = gp.GPOpt(
|
|
263
|
+
objective_func=crossval_objective,
|
|
264
|
+
lower_bound=np.array([-6, 1, 0.5, 0.5, 2]),
|
|
265
|
+
upper_bound=np.array([0, 16, 1.0, 1.0, 3]),
|
|
266
|
+
params_names=[
|
|
267
|
+
"learning_rate",
|
|
268
|
+
"max_depth",
|
|
269
|
+
"rowsample",
|
|
270
|
+
"colsample",
|
|
271
|
+
"n_estimators"
|
|
272
|
+
],
|
|
273
|
+
acquisition="ucb",
|
|
274
|
+
method="splitconformal",
|
|
275
|
+
surrogate_obj=ns.PredictionInterval(
|
|
276
|
+
obj=surrogate_obj, method="splitconformal"
|
|
277
|
+
),
|
|
278
|
+
n_init=n_init,
|
|
279
|
+
n_iter=n_iter,
|
|
280
|
+
seed=seed,
|
|
281
|
+
)
|
|
224
282
|
|
|
225
283
|
res = gp_opt.optimize(verbose=verbose, abs_tol=abs_tol)
|
|
226
284
|
res.best_params["model_type"] = model_type
|
|
227
|
-
res.best_params["n_estimators"] = int(n_estimators)
|
|
285
|
+
res.best_params["n_estimators"] = int(n_estimators) if n_estimators is not None else int(10 ** res.best_params["n_estimators"])
|
|
228
286
|
res.best_params["learning_rate"] = 10 ** res.best_params["learning_rate"]
|
|
229
287
|
res.best_params["max_depth"] = int(res.best_params["max_depth"])
|
|
230
288
|
res.best_params["rowsample"] = res.best_params["rowsample"]
|
|
@@ -256,7 +314,7 @@ def lazy_cross_val_optim(
|
|
|
256
314
|
type_fit="classification",
|
|
257
315
|
scoring="accuracy",
|
|
258
316
|
customize=False,
|
|
259
|
-
n_estimators=
|
|
317
|
+
n_estimators=None,
|
|
260
318
|
cv=5,
|
|
261
319
|
n_jobs=None,
|
|
262
320
|
n_init=10,
|
|
@@ -299,7 +357,7 @@ def lazy_cross_val_optim(
|
|
|
299
357
|
if True, the surrogate is transformed into a quasi-randomized network (default is False)
|
|
300
358
|
|
|
301
359
|
n_estimators: int
|
|
302
|
-
maximum number of trees that can be built
|
|
360
|
+
maximum number of trees that can be built (default is None, if None, the parameters is tuned)
|
|
303
361
|
|
|
304
362
|
cv: int;
|
|
305
363
|
number of cross-validation folds
|
|
@@ -325,9 +383,14 @@ def lazy_cross_val_optim(
|
|
|
325
383
|
Examples:
|
|
326
384
|
|
|
327
385
|
```python
|
|
386
|
+
import os
|
|
328
387
|
import unifiedbooster as ub
|
|
329
388
|
from sklearn.datasets import load_breast_cancer
|
|
330
389
|
from sklearn.model_selection import train_test_split
|
|
390
|
+
from sklearn.metrics import accuracy_score
|
|
391
|
+
from time import time
|
|
392
|
+
|
|
393
|
+
print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")
|
|
331
394
|
|
|
332
395
|
dataset = load_breast_cancer()
|
|
333
396
|
X, y = dataset.data, dataset.target
|
|
@@ -335,25 +398,26 @@ def lazy_cross_val_optim(
|
|
|
335
398
|
X, y, test_size=0.2, random_state=42
|
|
336
399
|
)
|
|
337
400
|
|
|
338
|
-
|
|
401
|
+
start = time()
|
|
402
|
+
res4 = ub.lazy_cross_val_optim(
|
|
339
403
|
X_train,
|
|
340
404
|
y_train,
|
|
341
|
-
X_test=
|
|
342
|
-
y_test=
|
|
405
|
+
X_test=X_test,
|
|
406
|
+
y_test=y_test,
|
|
343
407
|
model_type="lightgbm",
|
|
344
408
|
type_fit="classification",
|
|
345
409
|
scoring="accuracy",
|
|
346
410
|
n_estimators=100,
|
|
347
|
-
surrogate_obj=None,
|
|
348
411
|
cv=5,
|
|
349
412
|
n_jobs=None,
|
|
350
413
|
n_init=10,
|
|
351
414
|
n_iter=190,
|
|
352
415
|
abs_tol=1e-3,
|
|
353
|
-
verbose=2,
|
|
354
416
|
seed=123,
|
|
417
|
+
customize=False
|
|
355
418
|
)
|
|
356
|
-
print(
|
|
419
|
+
print(f"Elapsed: {time()-start}")
|
|
420
|
+
print(res4)
|
|
357
421
|
```
|
|
358
422
|
"""
|
|
359
423
|
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
docstring
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
# Authors: Henrik Linusson
|
|
8
|
+
# Yaniv Romano modified np.py file to include CQR
|
|
9
|
+
# T. Moudiki modified __init__.py to import classes
|
|
10
|
+
|
|
11
|
+
# __version__ = '2.1.0'
|
|
12
|
+
|
|
13
|
+
from .nc import (
|
|
14
|
+
AbsErrorErrFunc,
|
|
15
|
+
QuantileRegErrFunc,
|
|
16
|
+
RegressorNc,
|
|
17
|
+
RegressorNormalizer,
|
|
18
|
+
)
|
|
19
|
+
from .cp import IcpRegressor, TcpClassifier
|
|
20
|
+
from .icp import IcpClassifier
|
|
21
|
+
from .base import RegressorAdapter
|
|
22
|
+
|
|
23
|
+
__all__ = [
|
|
24
|
+
"AbsErrorErrFunc",
|
|
25
|
+
"QuantileRegErrFunc",
|
|
26
|
+
"RegressorAdapter",
|
|
27
|
+
"RegressorNc",
|
|
28
|
+
"RegressorNormalizer",
|
|
29
|
+
"IcpRegressor",
|
|
30
|
+
]
|