unifiedbooster 0.4.2__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
unifiedbooster/gbdt.py CHANGED
@@ -90,7 +90,9 @@ class GBDT(BaseEstimator):
90
90
  "depth": self.max_depth,
91
91
  "verbose": self.verbose,
92
92
  "random_seed": self.seed,
93
- "bootstrap_type": "MVS",
93
+ "boosting_type": "Plain",
94
+ "leaf_estimation_iterations": 1,
95
+ "bootstrap_type": "Bernoulli",
94
96
  **kwargs,
95
97
  }
96
98
  elif self.model_type == "gradientboosting":
@@ -51,42 +51,42 @@ class GBDTClassifier(GBDT, ClassifierMixin):
51
51
 
52
52
  Examples:
53
53
 
54
- ```python
55
- import unifiedbooster as ub
56
- from sklearn.datasets import load_iris
57
- from sklearn.model_selection import train_test_split
58
- from sklearn.metrics import accuracy_score
59
-
60
- # Load dataset
61
- iris = load_iris()
62
- X, y = iris.data, iris.target
63
-
64
- # Split dataset into training and testing sets
65
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
66
-
67
- # Initialize the unified regressor (example with XGBoost)
68
- regressor1 = ub.GBDTClassifier(model_type='xgboost')
69
- #regressor2 = ub.GBDTClassifier(model_type='catboost')
70
- regressor3 = ub.GBDTClassifier(model_type='lightgbm')
71
-
72
- # Fit the model
73
- regressor1.fit(X_train, y_train)
74
- #regressor2.fit(X_train, y_train)
75
- regressor3.fit(X_train, y_train)
76
-
77
- # Predict on the test set
78
- y_pred1 = regressor1.predict(X_test)
79
- #y_pred2 = regressor2.predict(X_test)
80
- y_pred3 = regressor3.predict(X_test)
81
-
82
- # Evaluate the model
83
- accuracy1 = accuracy_score(y_test, y_pred1)
84
- #accuracy2 = accuracy_score(y_test, y_pred2)
85
- accuracy3 = accuracy_score(y_test, y_pred3)
86
- print(f"Classification Accuracy xgboost: {accuracy1:.2f}")
87
- #print(f"Classification Accuracy catboost: {accuracy2:.2f}")
88
- print(f"Classification Accuracy lightgbm: {accuracy3:.2f}")
89
- ```
54
+ ```python
55
+ import unifiedbooster as ub
56
+ from sklearn.datasets import load_iris
57
+ from sklearn.model_selection import train_test_split
58
+ from sklearn.metrics import accuracy_score
59
+
60
+ # Load dataset
61
+ iris = load_iris()
62
+ X, y = iris.data, iris.target
63
+
64
+ # Split dataset into training and testing sets
65
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
66
+
67
+ # Initialize the unified regressor (example with XGBoost)
68
+ regressor1 = ub.GBDTClassifier(model_type='xgboost')
69
+ #regressor2 = ub.GBDTClassifier(model_type='catboost')
70
+ regressor3 = ub.GBDTClassifier(model_type='lightgbm')
71
+
72
+ # Fit the model
73
+ regressor1.fit(X_train, y_train)
74
+ #regressor2.fit(X_train, y_train)
75
+ regressor3.fit(X_train, y_train)
76
+
77
+ # Predict on the test set
78
+ y_pred1 = regressor1.predict(X_test)
79
+ #y_pred2 = regressor2.predict(X_test)
80
+ y_pred3 = regressor3.predict(X_test)
81
+
82
+ # Evaluate the model
83
+ accuracy1 = accuracy_score(y_test, y_pred1)
84
+ #accuracy2 = accuracy_score(y_test, y_pred2)
85
+ accuracy3 = accuracy_score(y_test, y_pred3)
86
+ print(f"Classification Accuracy xgboost: {accuracy1:.2f}")
87
+ #print(f"Classification Accuracy catboost: {accuracy2:.2f}")
88
+ print(f"Classification Accuracy lightgbm: {accuracy3:.2f}")
89
+ ```
90
90
  """
91
91
 
92
92
  def __init__(
@@ -51,42 +51,42 @@ class GBDTRegressor(GBDT, RegressorMixin):
51
51
 
52
52
  Examples:
53
53
 
54
- ```python
55
- import unifiedbooster as ub
56
- from sklearn.datasets import fetch_california_housing
57
- from sklearn.model_selection import train_test_split
58
- from sklearn.metrics import mean_squared_error
59
-
60
- # Load dataset
61
- housing = fetch_california_housing()
62
- X, y = housing.data, housing.target
63
-
64
- # Split dataset into training and testing sets
65
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
66
-
67
- # Initialize the unified regressor (example with XGBoost)
68
- regressor1 = ub.GBDTRegressor(model_type='xgboost')
69
- #regressor2 = ub.GBDTRegressor(model_type='catboost')
70
- regressor3 = ub.GBDTRegressor(model_type='lightgbm')
71
-
72
- # Fit the model
73
- regressor1.fit(X_train, y_train)
74
- #regressor2.fit(X_train, y_train)
75
- regressor3.fit(X_train, y_train)
76
-
77
- # Predict on the test set
78
- y_pred1 = regressor1.predict(X_test)
79
- #y_pred2 = regressor2.predict(X_test)
80
- y_pred3 = regressor3.predict(X_test)
81
-
82
- # Evaluate the model
83
- mse1 = mean_squared_error(y_test, y_pred1)
84
- #mse2 = mean_squared_error(y_test, y_pred2)
85
- mse3 = mean_squared_error(y_test, y_pred3)
86
- print(f"Regression Mean Squared Error xgboost: {mse1:.2f}")
87
- #print(f"Regression Mean Squared Error catboost: {mse2:.2f}")
88
- print(f"Regression Mean Squared Error lightgbm: {mse3:.2f}")
89
- ```
54
+ ```python
55
+ import unifiedbooster as ub
56
+ from sklearn.datasets import fetch_california_housing
57
+ from sklearn.model_selection import train_test_split
58
+ from sklearn.metrics import mean_squared_error
59
+
60
+ # Load dataset
61
+ housing = fetch_california_housing()
62
+ X, y = housing.data, housing.target
63
+
64
+ # Split dataset into training and testing sets
65
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
66
+
67
+ # Initialize the unified regressor (example with XGBoost)
68
+ regressor1 = ub.GBDTRegressor(model_type='xgboost')
69
+ #regressor2 = ub.GBDTRegressor(model_type='catboost')
70
+ regressor3 = ub.GBDTRegressor(model_type='lightgbm')
71
+
72
+ # Fit the model
73
+ regressor1.fit(X_train, y_train)
74
+ #regressor2.fit(X_train, y_train)
75
+ regressor3.fit(X_train, y_train)
76
+
77
+ # Predict on the test set
78
+ y_pred1 = regressor1.predict(X_test)
79
+ #y_pred2 = regressor2.predict(X_test)
80
+ y_pred3 = regressor3.predict(X_test)
81
+
82
+ # Evaluate the model
83
+ mse1 = mean_squared_error(y_test, y_pred1)
84
+ #mse2 = mean_squared_error(y_test, y_pred2)
85
+ mse3 = mean_squared_error(y_test, y_pred3)
86
+ print(f"Regression Mean Squared Error xgboost: {mse1:.2f}")
87
+ #print(f"Regression Mean Squared Error catboost: {mse2:.2f}")
88
+ print(f"Regression Mean Squared Error lightgbm: {mse3:.2f}")
89
+ ```
90
90
  """
91
91
 
92
92
  def __init__(
@@ -18,7 +18,7 @@ def cross_val_optim(
18
18
  model_type="xgboost",
19
19
  type_fit="classification",
20
20
  scoring="accuracy",
21
- n_estimators=100,
21
+ n_estimators=None,
22
22
  surrogate_obj=None,
23
23
  cv=5,
24
24
  n_jobs=None,
@@ -59,7 +59,7 @@ def cross_val_optim(
59
59
  scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules
60
60
 
61
61
  n_estimators: int
62
- maximum number of trees that can be built
62
+ maximum number of trees that can be built (default is None, and if None, then the parameter is tuned)
63
63
 
64
64
  surrogate_obj: an object;
65
65
  An ML model for estimating the uncertainty around the objective function
@@ -168,63 +168,121 @@ def cross_val_optim(
168
168
  ).mean()
169
169
 
170
170
  # objective function for hyperparams tuning
171
- def crossval_objective(xx):
172
- return gbdt_cv(
173
- X_train=X_train,
174
- y_train=y_train,
175
- model_type=model_type,
176
- n_estimators=n_estimators,
177
- learning_rate=10 ** xx[0],
178
- max_depth=int(xx[1]),
179
- rowsample=xx[2],
180
- colsample=xx[3],
181
- cv=cv,
182
- n_jobs=n_jobs,
183
- type_fit=type_fit,
184
- scoring=scoring,
185
- seed=seed,
186
- )
171
+ if n_estimators is not None:
172
+ def crossval_objective(xx):
173
+ return gbdt_cv(
174
+ X_train=X_train,
175
+ y_train=y_train,
176
+ model_type=model_type,
177
+ n_estimators=n_estimators,
178
+ learning_rate=10 ** xx[0],
179
+ max_depth=int(xx[1]),
180
+ rowsample=xx[2],
181
+ colsample=xx[3],
182
+ cv=cv,
183
+ n_jobs=n_jobs,
184
+ type_fit=type_fit,
185
+ scoring=scoring,
186
+ seed=seed,
187
+ )
188
+ else: # n_estimators is None
189
+ def crossval_objective(xx):
190
+ return gbdt_cv(
191
+ X_train=X_train,
192
+ y_train=y_train,
193
+ model_type=model_type,
194
+ n_estimators=int(10 ** xx[4]),
195
+ learning_rate=10 ** xx[0],
196
+ max_depth=int(xx[1]),
197
+ rowsample=xx[2],
198
+ colsample=xx[3],
199
+ cv=cv,
200
+ n_jobs=n_jobs,
201
+ type_fit=type_fit,
202
+ scoring=scoring,
203
+ seed=seed,
204
+ )
187
205
 
188
- if surrogate_obj is None:
189
- gp_opt = gp.GPOpt(
190
- objective_func=crossval_objective,
191
- lower_bound=np.array([-6, 1, 0.5, 0.5]),
192
- upper_bound=np.array([0, 16, 1.0, 1.0]),
193
- params_names=[
194
- "learning_rate",
195
- "max_depth",
196
- "rowsample",
197
- "colsample",
198
- ],
199
- method="bayesian",
200
- n_init=n_init,
201
- n_iter=n_iter,
202
- seed=seed,
203
- )
204
- else:
205
- gp_opt = gp.GPOpt(
206
- objective_func=crossval_objective,
207
- lower_bound=np.array([-6, 1, 0.5, 0.5]),
208
- upper_bound=np.array([0, 16, 1.0, 1.0]),
209
- params_names=[
210
- "learning_rate",
211
- "max_depth",
212
- "rowsample",
213
- "colsample",
214
- ],
215
- acquisition="ucb",
216
- method="splitconformal",
217
- surrogate_obj=ns.PredictionInterval(
218
- obj=surrogate_obj, method="splitconformal"
219
- ),
220
- n_init=n_init,
221
- n_iter=n_iter,
222
- seed=seed,
223
- )
206
+ if n_estimators is not None:
207
+ if surrogate_obj is None:
208
+ gp_opt = gp.GPOpt(
209
+ objective_func=crossval_objective,
210
+ lower_bound=np.array([-6, 1, 0.5, 0.5]),
211
+ upper_bound=np.array([0, 16, 1.0, 1.0]),
212
+ params_names=[
213
+ "learning_rate",
214
+ "max_depth",
215
+ "rowsample",
216
+ "colsample",
217
+ ],
218
+ method="bayesian",
219
+ n_init=n_init,
220
+ n_iter=n_iter,
221
+ seed=seed,
222
+ )
223
+ else:
224
+ gp_opt = gp.GPOpt(
225
+ objective_func=crossval_objective,
226
+ lower_bound=np.array([-6, 1, 0.5, 0.5]),
227
+ upper_bound=np.array([0, 16, 1.0, 1.0]),
228
+ params_names=[
229
+ "learning_rate",
230
+ "max_depth",
231
+ "rowsample",
232
+ "colsample",
233
+ ],
234
+ acquisition="ucb",
235
+ method="splitconformal",
236
+ surrogate_obj=ns.PredictionInterval(
237
+ obj=surrogate_obj, method="splitconformal"
238
+ ),
239
+ n_init=n_init,
240
+ n_iter=n_iter,
241
+ seed=seed,
242
+ )
243
+ else: # n_estimators is None
244
+ if surrogate_obj is None:
245
+ gp_opt = gp.GPOpt(
246
+ objective_func=crossval_objective,
247
+ lower_bound=np.array([-6, 1, 0.5, 0.5, 2]),
248
+ upper_bound=np.array([0, 16, 1.0, 1.0, 3]),
249
+ params_names=[
250
+ "learning_rate",
251
+ "max_depth",
252
+ "rowsample",
253
+ "colsample",
254
+ "n_estimators"
255
+ ],
256
+ method="bayesian",
257
+ n_init=n_init,
258
+ n_iter=n_iter,
259
+ seed=seed,
260
+ )
261
+ else:
262
+ gp_opt = gp.GPOpt(
263
+ objective_func=crossval_objective,
264
+ lower_bound=np.array([-6, 1, 0.5, 0.5, 2]),
265
+ upper_bound=np.array([0, 16, 1.0, 1.0, 3]),
266
+ params_names=[
267
+ "learning_rate",
268
+ "max_depth",
269
+ "rowsample",
270
+ "colsample",
271
+ "n_estimators"
272
+ ],
273
+ acquisition="ucb",
274
+ method="splitconformal",
275
+ surrogate_obj=ns.PredictionInterval(
276
+ obj=surrogate_obj, method="splitconformal"
277
+ ),
278
+ n_init=n_init,
279
+ n_iter=n_iter,
280
+ seed=seed,
281
+ )
224
282
 
225
283
  res = gp_opt.optimize(verbose=verbose, abs_tol=abs_tol)
226
284
  res.best_params["model_type"] = model_type
227
- res.best_params["n_estimators"] = int(n_estimators)
285
+ res.best_params["n_estimators"] = int(n_estimators) if n_estimators is not None else int(10 ** res.best_params["n_estimators"])
228
286
  res.best_params["learning_rate"] = 10 ** res.best_params["learning_rate"]
229
287
  res.best_params["max_depth"] = int(res.best_params["max_depth"])
230
288
  res.best_params["rowsample"] = res.best_params["rowsample"]
@@ -256,7 +314,7 @@ def lazy_cross_val_optim(
256
314
  type_fit="classification",
257
315
  scoring="accuracy",
258
316
  customize=False,
259
- n_estimators=100,
317
+ n_estimators=None,
260
318
  cv=5,
261
319
  n_jobs=None,
262
320
  n_init=10,
@@ -299,7 +357,7 @@ def lazy_cross_val_optim(
299
357
  if True, the surrogate is transformed into a quasi-randomized network (default is False)
300
358
 
301
359
  n_estimators: int
302
- maximum number of trees that can be built
360
+ maximum number of trees that can be built (default is None, if None, the parameters is tuned)
303
361
 
304
362
  cv: int;
305
363
  number of cross-validation folds
@@ -325,9 +383,14 @@ def lazy_cross_val_optim(
325
383
  Examples:
326
384
 
327
385
  ```python
386
+ import os
328
387
  import unifiedbooster as ub
329
388
  from sklearn.datasets import load_breast_cancer
330
389
  from sklearn.model_selection import train_test_split
390
+ from sklearn.metrics import accuracy_score
391
+ from time import time
392
+
393
+ print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")
331
394
 
332
395
  dataset = load_breast_cancer()
333
396
  X, y = dataset.data, dataset.target
@@ -335,25 +398,26 @@ def lazy_cross_val_optim(
335
398
  X, y, test_size=0.2, random_state=42
336
399
  )
337
400
 
338
- res1 = ub.cross_val_optim(
401
+ start = time()
402
+ res4 = ub.lazy_cross_val_optim(
339
403
  X_train,
340
404
  y_train,
341
- X_test=None,
342
- y_test=None,
405
+ X_test=X_test,
406
+ y_test=y_test,
343
407
  model_type="lightgbm",
344
408
  type_fit="classification",
345
409
  scoring="accuracy",
346
410
  n_estimators=100,
347
- surrogate_obj=None,
348
411
  cv=5,
349
412
  n_jobs=None,
350
413
  n_init=10,
351
414
  n_iter=190,
352
415
  abs_tol=1e-3,
353
- verbose=2,
354
416
  seed=123,
417
+ customize=False
355
418
  )
356
- print(res1)
419
+ print(f"Elapsed: {time()-start}")
420
+ print(res4)
357
421
  ```
358
422
  """
359
423
 
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env python
2
+
3
+ """
4
+ docstring
5
+ """
6
+
7
+ # Authors: Henrik Linusson
8
+ # Yaniv Romano modified np.py file to include CQR
9
+ # T. Moudiki modified __init__.py to import classes
10
+
11
+ # __version__ = '2.1.0'
12
+
13
+ from .nc import (
14
+ AbsErrorErrFunc,
15
+ QuantileRegErrFunc,
16
+ RegressorNc,
17
+ RegressorNormalizer,
18
+ )
19
+ from .cp import IcpRegressor, TcpClassifier
20
+ from .icp import IcpClassifier
21
+ from .base import RegressorAdapter
22
+
23
+ __all__ = [
24
+ "AbsErrorErrFunc",
25
+ "QuantileRegErrFunc",
26
+ "RegressorAdapter",
27
+ "RegressorNc",
28
+ "RegressorNormalizer",
29
+ "IcpRegressor",
30
+ ]