unifiedbooster 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
unifiedbooster/gbdt.py CHANGED
@@ -35,7 +35,6 @@ class GBDT(BaseEstimator):
35
35
  **kwargs: dict
36
36
  additional parameters to be passed to the class
37
37
  """
38
-
39
38
  def __init__(
40
39
  self,
41
40
  model_type="xgboost",
@@ -44,6 +43,8 @@ class GBDT(BaseEstimator):
44
43
  max_depth=3,
45
44
  rowsample=1.0,
46
45
  colsample=1.0,
46
+ level=None,
47
+ pi_method=None,
47
48
  verbose=0,
48
49
  seed=123,
49
50
  **kwargs
@@ -55,6 +56,8 @@ class GBDT(BaseEstimator):
55
56
  self.max_depth = max_depth
56
57
  self.rowsample = rowsample
57
58
  self.colsample = colsample
59
+ self.level = level
60
+ self.pi_method = pi_method
58
61
  self.verbose = verbose
59
62
  self.seed = seed
60
63
 
@@ -91,7 +94,7 @@ class GBDT(BaseEstimator):
91
94
  "verbose": self.verbose,
92
95
  "random_seed": self.seed,
93
96
  "boosting_type": "Plain",
94
- "leaf_estimation_iterations": 1,
97
+ "leaf_estimation_iterations": 1,
95
98
  "bootstrap_type": "Bernoulli",
96
99
  **kwargs,
97
100
  }
@@ -126,7 +129,6 @@ class GBDT(BaseEstimator):
126
129
 
127
130
  self: object
128
131
  """
129
-
130
132
  if getattr(self, "type_fit") == "classification":
131
133
  self.classes_ = np.unique(y) # for compatibility with sklearn
132
134
  self.n_classes_ = len(
@@ -152,5 +154,7 @@ class GBDT(BaseEstimator):
152
154
 
153
155
  model predictions: {array-like}
154
156
  """
155
-
156
- return getattr(self, "model").predict(X)
157
+ if self.level is not None and self.type_fit == "regression":
158
+ return getattr(self, "model").predict(X, return_pi=True)
159
+ else:
160
+ return getattr(self, "model").predict(X)
@@ -1,5 +1,6 @@
1
1
  from .gbdt import GBDT
2
2
  from sklearn.base import ClassifierMixin
3
+ from .predictionset import PredictionSet
3
4
 
4
5
  try:
5
6
  from xgboost import XGBClassifier
@@ -39,6 +40,12 @@ class GBDTClassifier(GBDT, ClassifierMixin):
39
40
 
40
41
  colsample: float
41
42
  percentage of features to use at each node split
43
+
44
+ level: float
45
+ confidence level for prediction sets
46
+
47
+ pi_method: str
48
+ method for constructing the prediction intervals: 'icp' (inductive conformal), 'tcp' (transductive conformal)
42
49
 
43
50
  verbose: int
44
51
  controls verbosity (default=0)
@@ -88,7 +95,6 @@ class GBDTClassifier(GBDT, ClassifierMixin):
88
95
  print(f"Classification Accuracy lightgbm: {accuracy3:.2f}")
89
96
  ```
90
97
  """
91
-
92
98
  def __init__(
93
99
  self,
94
100
  model_type="xgboost",
@@ -97,6 +103,8 @@ class GBDTClassifier(GBDT, ClassifierMixin):
97
103
  max_depth=3,
98
104
  rowsample=1.0,
99
105
  colsample=1.0,
106
+ level=None,
107
+ pi_method="icp",
100
108
  verbose=0,
101
109
  seed=123,
102
110
  **kwargs,
@@ -111,21 +119,46 @@ class GBDTClassifier(GBDT, ClassifierMixin):
111
119
  max_depth=max_depth,
112
120
  rowsample=rowsample,
113
121
  colsample=colsample,
122
+ level=level,
123
+ pi_method=pi_method,
114
124
  verbose=verbose,
115
125
  seed=seed,
116
126
  **kwargs,
117
127
  )
118
128
 
119
- if model_type == "xgboost":
120
- self.model = XGBClassifier(**self.params)
121
- elif model_type == "catboost":
122
- self.model = CatBoostClassifier(**self.params)
123
- elif model_type == "lightgbm":
124
- self.model = LGBMClassifier(**self.params)
125
- elif model_type == "gradientboosting":
126
- self.model = GradientBoostingClassifier(**self.params)
129
+ if self.level is not None:
130
+
131
+ if model_type == "xgboost":
132
+ self.model = PredictionSet(XGBClassifier(**self.params),
133
+ level=self.level,
134
+ method=self.pi_method)
135
+ elif model_type == "catboost":
136
+ self.model = PredictionSet(CatBoostClassifier(**self.params),
137
+ level=self.level,
138
+ method=self.pi_method)
139
+ elif model_type == "lightgbm":
140
+ self.model = PredictionSet(LGBMClassifier(**self.params),
141
+ level=self.level,
142
+ method=self.pi_method)
143
+ elif model_type == "gradientboosting":
144
+ self.model = PredictionSet(GradientBoostingClassifier(**self.params),
145
+ level=self.level,
146
+ method=self.pi_method)
147
+ else:
148
+ raise ValueError(f"Unknown model_type: {model_type}")
149
+
127
150
  else:
128
- raise ValueError(f"Unknown model_type: {model_type}")
151
+
152
+ if model_type == "xgboost":
153
+ self.model = XGBClassifier(**self.params)
154
+ elif model_type == "catboost":
155
+ self.model = CatBoostClassifier(**self.params)
156
+ elif model_type == "lightgbm":
157
+ self.model = LGBMClassifier(**self.params)
158
+ elif model_type == "gradientboosting":
159
+ self.model = GradientBoostingClassifier(**self.params)
160
+ else:
161
+ raise ValueError(f"Unknown model_type: {model_type}")
129
162
 
130
163
  def predict_proba(self, X):
131
164
  """Predict probabilities for test data X.
@@ -143,5 +176,4 @@ class GBDTClassifier(GBDT, ClassifierMixin):
143
176
 
144
177
  probability estimates for test data: {array-like}
145
178
  """
146
-
147
179
  return self.model.predict_proba(X)
@@ -1,5 +1,6 @@
1
1
  from .gbdt import GBDT
2
2
  from sklearn.base import RegressorMixin
3
+ from .predictioninterval import PredictionInterval
3
4
 
4
5
  try:
5
6
  from xgboost import XGBRegressor
@@ -39,6 +40,12 @@ class GBDTRegressor(GBDT, RegressorMixin):
39
40
 
40
41
  colsample: float
41
42
  percentage of features to use at each node split
43
+
44
+ level: float
45
+ confidence level for prediction sets
46
+
47
+ pi_method: str
48
+ method for constructing the prediction intervals: 'splitconformal', 'localconformal'
42
49
 
43
50
  verbose: int
44
51
  controls verbosity (default=0)
@@ -88,7 +95,6 @@ class GBDTRegressor(GBDT, RegressorMixin):
88
95
  print(f"Regression Mean Squared Error lightgbm: {mse3:.2f}")
89
96
  ```
90
97
  """
91
-
92
98
  def __init__(
93
99
  self,
94
100
  model_type="xgboost",
@@ -97,12 +103,14 @@ class GBDTRegressor(GBDT, RegressorMixin):
97
103
  max_depth=3,
98
104
  rowsample=1.0,
99
105
  colsample=1.0,
106
+ level=None,
107
+ pi_method="splitconformal",
100
108
  verbose=0,
101
109
  seed=123,
102
110
  **kwargs,
103
111
  ):
104
112
 
105
- self.type_fit = "regression"
113
+ self.type_fit = "regression"
106
114
 
107
115
  super().__init__(
108
116
  model_type=model_type,
@@ -111,18 +119,43 @@ class GBDTRegressor(GBDT, RegressorMixin):
111
119
  max_depth=max_depth,
112
120
  rowsample=rowsample,
113
121
  colsample=colsample,
122
+ level=level,
123
+ pi_method=pi_method,
114
124
  verbose=verbose,
115
125
  seed=seed,
116
126
  **kwargs,
117
127
  )
118
128
 
119
- if model_type == "xgboost":
120
- self.model = XGBRegressor(**self.params)
121
- elif model_type == "catboost":
122
- self.model = CatBoostRegressor(**self.params)
123
- elif model_type == "lightgbm":
124
- self.model = LGBMRegressor(**self.params)
125
- elif model_type == "gradientboosting":
126
- self.model = GradientBoostingRegressor(**self.params)
127
- else:
128
- raise ValueError(f"Unknown model_type: {model_type}")
129
+ if self.level is not None:
130
+
131
+ if model_type == "xgboost":
132
+ self.model = PredictionInterval(XGBRegressor(**self.params),
133
+ level=self.level,
134
+ method=self.pi_method)
135
+ elif model_type == "catboost":
136
+ self.model = PredictionInterval(CatBoostRegressor(**self.params),
137
+ level=self.level,
138
+ method=self.pi_method)
139
+ elif model_type == "lightgbm":
140
+ self.model = PredictionInterval(LGBMRegressor(**self.params),
141
+ level=self.level,
142
+ method=self.pi_method)
143
+ elif model_type == "gradientboosting":
144
+ self.model = PredictionInterval(GradientBoostingRegressor(**self.params),
145
+ level=self.level,
146
+ method=self.pi_method)
147
+ else:
148
+ raise ValueError(f"Unknown model_type: {model_type}")
149
+
150
+ else:
151
+
152
+ if model_type == "xgboost":
153
+ self.model = XGBRegressor(**self.params)
154
+ elif model_type == "catboost":
155
+ self.model = CatBoostRegressor(**self.params)
156
+ elif model_type == "lightgbm":
157
+ self.model = LGBMRegressor(**self.params)
158
+ elif model_type == "gradientboosting":
159
+ self.model = GradientBoostingRegressor(**self.params)
160
+ else:
161
+ raise ValueError(f"Unknown model_type: {model_type}")
@@ -168,7 +168,8 @@ def cross_val_optim(
168
168
  ).mean()
169
169
 
170
170
  # objective function for hyperparams tuning
171
- if n_estimators is not None:
171
+ if n_estimators is not None:
172
+
172
173
  def crossval_objective(xx):
173
174
  return gbdt_cv(
174
175
  X_train=X_train,
@@ -185,25 +186,27 @@ def cross_val_optim(
185
186
  scoring=scoring,
186
187
  seed=seed,
187
188
  )
188
- else: # n_estimators is None
189
+
190
+ else: # n_estimators is None
191
+
189
192
  def crossval_objective(xx):
190
193
  return gbdt_cv(
191
- X_train=X_train,
192
- y_train=y_train,
193
- model_type=model_type,
194
- n_estimators=int(10 ** xx[4]),
195
- learning_rate=10 ** xx[0],
196
- max_depth=int(xx[1]),
197
- rowsample=xx[2],
198
- colsample=xx[3],
199
- cv=cv,
200
- n_jobs=n_jobs,
201
- type_fit=type_fit,
202
- scoring=scoring,
203
- seed=seed,
204
- )
194
+ X_train=X_train,
195
+ y_train=y_train,
196
+ model_type=model_type,
197
+ n_estimators=int(10 ** xx[4]),
198
+ learning_rate=10 ** xx[0],
199
+ max_depth=int(xx[1]),
200
+ rowsample=xx[2],
201
+ colsample=xx[3],
202
+ cv=cv,
203
+ n_jobs=n_jobs,
204
+ type_fit=type_fit,
205
+ scoring=scoring,
206
+ seed=seed,
207
+ )
205
208
 
206
- if n_estimators is not None:
209
+ if n_estimators is not None:
207
210
  if surrogate_obj is None:
208
211
  gp_opt = gp.GPOpt(
209
212
  objective_func=crossval_objective,
@@ -240,7 +243,7 @@ def cross_val_optim(
240
243
  n_iter=n_iter,
241
244
  seed=seed,
242
245
  )
243
- else: # n_estimators is None
246
+ else: # n_estimators is None
244
247
  if surrogate_obj is None:
245
248
  gp_opt = gp.GPOpt(
246
249
  objective_func=crossval_objective,
@@ -251,7 +254,7 @@ def cross_val_optim(
251
254
  "max_depth",
252
255
  "rowsample",
253
256
  "colsample",
254
- "n_estimators"
257
+ "n_estimators",
255
258
  ],
256
259
  method="bayesian",
257
260
  n_init=n_init,
@@ -268,7 +271,7 @@ def cross_val_optim(
268
271
  "max_depth",
269
272
  "rowsample",
270
273
  "colsample",
271
- "n_estimators"
274
+ "n_estimators",
272
275
  ],
273
276
  acquisition="ucb",
274
277
  method="splitconformal",
@@ -282,7 +285,11 @@ def cross_val_optim(
282
285
 
283
286
  res = gp_opt.optimize(verbose=verbose, abs_tol=abs_tol)
284
287
  res.best_params["model_type"] = model_type
285
- res.best_params["n_estimators"] = int(n_estimators) if n_estimators is not None else int(10 ** res.best_params["n_estimators"])
288
+ res.best_params["n_estimators"] = (
289
+ int(n_estimators)
290
+ if n_estimators is not None
291
+ else int(10 ** res.best_params["n_estimators"])
292
+ )
286
293
  res.best_params["learning_rate"] = 10 ** res.best_params["learning_rate"]
287
294
  res.best_params["max_depth"] = int(res.best_params["max_depth"])
288
295
  res.best_params["rowsample"] = res.best_params["rowsample"]
@@ -355,7 +362,7 @@ def lazy_cross_val_optim(
355
362
 
356
363
  customize: boolean
357
364
  if True, the surrogate is transformed into a quasi-randomized network (default is False)
358
-
365
+
359
366
  n_estimators: int
360
367
  maximum number of trees that can be built (default is None, if None, the parameters is tuned)
361
368
 
@@ -383,7 +390,7 @@ def lazy_cross_val_optim(
383
390
  Examples:
384
391
 
385
392
  ```python
386
- import os
393
+ import os
387
394
  import unifiedbooster as ub
388
395
  from sklearn.datasets import load_breast_cancer
389
396
  from sklearn.model_selection import train_test_split
@@ -454,7 +461,7 @@ def lazy_cross_val_optim(
454
461
  if customize == True:
455
462
  print(f"\n surrogate: CustomRegressor({est[0]})")
456
463
  surr_obj = ns.CustomRegressor(obj=est[1]())
457
- else:
464
+ else:
458
465
  print(f"\n surrogate: {est[0]}")
459
466
  surr_obj = est[1]()
460
467
  res = cross_val_optim(
@@ -479,7 +486,7 @@ def lazy_cross_val_optim(
479
486
  if customize == True:
480
487
  results.append((f"CustomRegressor({est[0]})", res))
481
488
  else:
482
- results.append((est[0], res))
489
+ results.append((est[0], res))
483
490
  except:
484
491
  pass
485
492
 
@@ -18,13 +18,19 @@ from .nc import (
18
18
  )
19
19
  from .cp import IcpRegressor, TcpClassifier
20
20
  from .icp import IcpClassifier
21
- from .base import RegressorAdapter
21
+ from .nc import ClassifierNc, MarginErrFunc
22
+ from .base import RegressorAdapter, ClassifierAdapter
22
23
 
23
24
  __all__ = [
24
25
  "AbsErrorErrFunc",
26
+ "MarginErrFunc",
25
27
  "QuantileRegErrFunc",
26
28
  "RegressorAdapter",
29
+ "ClassifierAdapter",
27
30
  "RegressorNc",
31
+ "ClassifierNc",
28
32
  "RegressorNormalizer",
29
33
  "IcpRegressor",
34
+ "IcpClassifier",
35
+ "TcpClassifier"
30
36
  ]
@@ -9,7 +9,7 @@ docstring
9
9
  import abc
10
10
  import numpy as np
11
11
 
12
- from sklearn.base import BaseEstimator
12
+ from sklearn.base import BaseEstimator, RegressorMixin, ClassifierMixin
13
13
 
14
14
 
15
15
  class RegressorMixin(object):
@@ -102,15 +102,15 @@ class BaseModelAdapter(BaseEstimator):
102
102
  pass
103
103
 
104
104
 
105
- class ClassifierAdapter(BaseModelAdapter):
105
+ class ClassifierAdapter(BaseModelAdapter, ClassifierMixin):
106
106
  def __init__(self, model, fit_params=None):
107
107
  super(ClassifierAdapter, self).__init__(model, fit_params)
108
108
 
109
109
  def _underlying_predict(self, x):
110
110
  return self.model.predict_proba(x)
111
+
111
112
 
112
-
113
- class RegressorAdapter(BaseModelAdapter):
113
+ class RegressorAdapter(BaseModelAdapter, RegressorMixin):
114
114
  def __init__(self, model, fit_params=None):
115
115
  super(RegressorAdapter, self).__init__(model, fit_params)
116
116
 
@@ -0,0 +1,3 @@
1
+ from .predictioninterval import PredictionInterval
2
+
3
+ __all__ = ["PredictionInterval"]
@@ -0,0 +1,314 @@
1
+ from locale import normalize
2
+ import numpy as np
3
+ import pickle
4
+ from collections import namedtuple
5
+ from sklearn.base import BaseEstimator, RegressorMixin
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.ensemble import ExtraTreesRegressor
8
+ from sklearn.preprocessing import StandardScaler
9
+ from sklearn.neighbors import KernelDensity
10
+ from sklearn.model_selection import GridSearchCV
11
+ from scipy.stats import gaussian_kde
12
+ from tqdm import tqdm
13
+ from ..nonconformist import IcpRegressor
14
+ from ..nonconformist import RegressorNc
15
+ from ..nonconformist import RegressorNormalizer, AbsErrorErrFunc
16
+
17
+
18
+ class PredictionInterval(BaseEstimator, RegressorMixin):
19
+ """Class PredictionInterval: Obtain prediction intervals.
20
+
21
+ Attributes:
22
+
23
+ obj: an object;
24
+ fitted object containing methods `fit` and `predict`
25
+
26
+ method: a string;
27
+ method for constructing the prediction intervals.
28
+ Currently "splitconformal" (default) and "localconformal"
29
+
30
+ level: a float;
31
+ Confidence level for prediction intervals. Default is 95,
32
+ equivalent to a miscoverage error of 5 (%)
33
+
34
+ replications: an integer;
35
+ Number of replications for simulated conformal (default is `None`)
36
+
37
+ type_pi: a string;
38
+ type of prediction interval: currently "kde" (default) or "bootstrap"
39
+
40
+ type_split: a string;
41
+ "random" (random split of data) or "sequential" (sequential split of data)
42
+
43
+ seed: an integer;
44
+ Reproducibility of fit (there's a random split between fitting and calibration data)
45
+ """
46
+
47
+ def __init__(
48
+ self,
49
+ obj,
50
+ method="splitconformal",
51
+ level=95,
52
+ type_pi="bootstrap",
53
+ type_split="random",
54
+ replications=None,
55
+ kernel=None,
56
+ agg="mean",
57
+ seed=123,
58
+ ):
59
+
60
+ self.obj = obj
61
+ self.method = method
62
+ self.level = level
63
+ self.type_pi = type_pi
64
+ self.type_split = type_split
65
+ self.replications = replications
66
+ self.kernel = kernel
67
+ self.agg = agg
68
+ self.seed = seed
69
+ self.alpha_ = 1 - self.level / 100
70
+ self.quantile_ = None
71
+ self.icp_ = None
72
+ self.calibrated_residuals_ = None
73
+ self.scaled_calibrated_residuals_ = None
74
+ self.calibrated_residuals_scaler_ = None
75
+ self.kde_ = None
76
+
77
+ def fit(self, X, y):
78
+ """Fit the `method` to training data (X, y).
79
+
80
+ Args:
81
+
82
+ X: array-like, shape = [n_samples, n_features];
83
+ Training set vectors, where n_samples is the number
84
+ of samples and n_features is the number of features.
85
+
86
+ y: array-like, shape = [n_samples, ]; Target values.
87
+
88
+ """
89
+
90
+ if self.type_split == "random":
91
+ X_train, X_calibration, y_train, y_calibration = train_test_split(
92
+ X, y, test_size=0.5, random_state=self.seed
93
+ )
94
+ elif self.type_split == "sequential":
95
+ n_x = X.shape[0]
96
+ n_x_half = n_x // 2
97
+ first_half_idx = range(0, n_x_half)
98
+ second_half_idx = range(n_x_half, n_x)
99
+ X_train = X[first_half_idx, :]
100
+ X_calibration = X[second_half_idx, :]
101
+ y_train = y[first_half_idx]
102
+ y_calibration = y[second_half_idx]
103
+
104
+ if self.method == "splitconformal":
105
+
106
+ n_samples_calibration = X_calibration.shape[0]
107
+ self.obj.fit(X_train, y_train)
108
+ preds_calibration = self.obj.predict(X_calibration)
109
+ self.calibrated_residuals_ = y_calibration - preds_calibration
110
+ absolute_residuals = np.abs(self.calibrated_residuals_)
111
+ self.calibrated_residuals_scaler_ = StandardScaler(
112
+ with_mean=True, with_std=True
113
+ )
114
+ self.scaled_calibrated_residuals_ = (
115
+ self.calibrated_residuals_scaler_.fit_transform(
116
+ self.calibrated_residuals_.reshape(-1, 1)
117
+ ).ravel()
118
+ )
119
+ try:
120
+ # numpy version >= 1.22
121
+ self.quantile_ = np.quantile(
122
+ a=absolute_residuals, q=self.level / 100, method="higher"
123
+ )
124
+ except:
125
+ # numpy version < 1.22
126
+ self.quantile_ = np.quantile(
127
+ a=absolute_residuals,
128
+ q=self.level / 100,
129
+ interpolation="higher",
130
+ )
131
+
132
+ if self.method == "localconformal":
133
+
134
+ mad_estimator = ExtraTreesRegressor()
135
+ normalizer = RegressorNormalizer(
136
+ self.obj, mad_estimator, AbsErrorErrFunc()
137
+ )
138
+ nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer)
139
+ self.icp_ = IcpRegressor(nc)
140
+ self.icp_.fit(X_train, y_train)
141
+ self.icp_.calibrate(X_calibration, y_calibration)
142
+
143
+ return self
144
+
145
+ def predict(self, X, return_pi=False):
146
+ """Obtain predictions and prediction intervals
147
+
148
+ Args:
149
+
150
+ X: array-like, shape = [n_samples, n_features];
151
+ Testing set vectors, where n_samples is the number
152
+ of samples and n_features is the number of features.
153
+
154
+ return_pi: boolean
155
+ Whether the prediction interval is returned or not.
156
+ Default is False, for compatibility with other _estimators_.
157
+ If True, a tuple containing the predictions + lower and upper
158
+ bounds is returned.
159
+
160
+ """
161
+
162
+ pred = self.obj.predict(X)
163
+
164
+ if self.method == "splitconformal":
165
+
166
+ if self.replications is None:
167
+
168
+ if return_pi:
169
+
170
+ DescribeResult = namedtuple(
171
+ "DescribeResult", ("mean", "lower", "upper")
172
+ )
173
+
174
+ return DescribeResult(
175
+ pred, pred - self.quantile_, pred + self.quantile_
176
+ )
177
+
178
+ else:
179
+
180
+ return pred
181
+
182
+ else: # if self.replications is not None
183
+
184
+ assert self.type_pi in (
185
+ "bootstrap",
186
+ "kde",
187
+ ), "`self.type_pi` must be in ('bootstrap', 'kde')"
188
+
189
+ if self.type_pi == "bootstrap":
190
+ np.random.seed(self.seed)
191
+ self.residuals_sims_ = np.asarray(
192
+ [
193
+ np.random.choice(
194
+ a=self.scaled_calibrated_residuals_,
195
+ size=X.shape[0],
196
+ )
197
+ for _ in range(self.replications)
198
+ ]
199
+ ).T
200
+ self.sims_ = np.asarray(
201
+ [
202
+ pred
203
+ + self.calibrated_residuals_scaler_.scale_[0]
204
+ * self.residuals_sims_[:, i].ravel()
205
+ for i in range(self.replications)
206
+ ]
207
+ ).T
208
+ elif self.type_pi == "kde":
209
+ self.kde_ = gaussian_kde(
210
+ dataset=self.scaled_calibrated_residuals_
211
+ )
212
+ self.sims_ = np.asarray(
213
+ [
214
+ pred
215
+ + self.calibrated_residuals_scaler_.scale_[0]
216
+ * self.kde_.resample(
217
+ size=X.shape[0], seed=self.seed + i
218
+ ).ravel()
219
+ for i in range(self.replications)
220
+ ]
221
+ ).T
222
+
223
+ self.mean_ = np.mean(self.sims_, axis=1)
224
+ self.lower_ = np.quantile(
225
+ self.sims_, q=self.alpha_ / 200, axis=1
226
+ )
227
+ self.upper_ = np.quantile(
228
+ self.sims_, q=1 - self.alpha_ / 200, axis=1
229
+ )
230
+
231
+ DescribeResult = namedtuple(
232
+ "DescribeResult", ("mean", "sims", "lower", "upper")
233
+ )
234
+
235
+ return DescribeResult(
236
+ self.mean_, self.sims_, self.lower_, self.upper_
237
+ )
238
+
239
+ if self.method == "localconformal":
240
+
241
+ if self.replications is None:
242
+
243
+ if return_pi:
244
+
245
+ predictions_bounds = self.icp_.predict(
246
+ X, significance=1 - self.level
247
+ )
248
+ DescribeResult = namedtuple(
249
+ "DescribeResult", ("mean", "lower", "upper")
250
+ )
251
+ return DescribeResult(
252
+ pred, predictions_bounds[:, 0], predictions_bounds[:, 1]
253
+ )
254
+
255
+ else:
256
+
257
+ return pred
258
+
259
+ else: # if self.replications is not None
260
+
261
+ assert self.type_pi in (
262
+ "bootstrap",
263
+ "kde",
264
+ ), "`self.type_pi` must be in ('bootstrap', 'kde')"
265
+
266
+ if self.type_pi == "bootstrap":
267
+ np.random.seed(self.seed)
268
+ self.residuals_sims_ = np.asarray(
269
+ [
270
+ np.random.choice(
271
+ a=self.scaled_calibrated_residuals_,
272
+ size=X.shape[0],
273
+ )
274
+ for _ in range(self.replications)
275
+ ]
276
+ ).T
277
+ self.sims_ = np.asarray(
278
+ [
279
+ pred
280
+ + self.calibrated_residuals_scaler_.scale_[0]
281
+ * self.residuals_sims_[:, i].ravel()
282
+ for i in tqdm(range(self.replications))
283
+ ]
284
+ ).T
285
+ elif self.type_pi == "kde":
286
+ self.kde_ = gaussian_kde(
287
+ dataset=self.scaled_calibrated_residuals_
288
+ )
289
+ self.sims_ = np.asarray(
290
+ [
291
+ pred
292
+ + self.calibrated_residuals_scaler_.scale_[0]
293
+ * self.kde_.resample(
294
+ size=X.shape[0], seed=self.seed + i
295
+ ).ravel()
296
+ for i in tqdm(range(self.replications))
297
+ ]
298
+ ).T
299
+
300
+ self.mean_ = np.mean(self.sims_, axis=1)
301
+ self.lower_ = np.quantile(
302
+ self.sims_, q=self.alpha_ / 200, axis=1
303
+ )
304
+ self.upper_ = np.quantile(
305
+ self.sims_, q=1 - self.alpha_ / 200, axis=1
306
+ )
307
+
308
+ DescribeResult = namedtuple(
309
+ "DescribeResult", ("mean", "sims", "lower", "upper")
310
+ )
311
+
312
+ return DescribeResult(
313
+ self.mean_, self.sims_, self.lower_, self.upper_
314
+ )
@@ -0,0 +1,3 @@
1
+ from .predictionset import PredictionSet
2
+
3
+ __all__ = ["PredictionSet"]
@@ -0,0 +1,111 @@
1
+ from locale import normalize
2
+ import numpy as np
3
+ import pickle
4
+ from collections import namedtuple
5
+ from sklearn.base import BaseEstimator, ClassifierMixin
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.ensemble import ExtraTreesRegressor
8
+ from sklearn.preprocessing import StandardScaler
9
+ from scipy.stats import gaussian_kde
10
+ from tqdm import tqdm
11
+ from ..nonconformist import ClassifierAdapter, IcpClassifier, TcpClassifier, ClassifierNc, MarginErrFunc
12
+
13
+
14
+ class PredictionSet(BaseEstimator, ClassifierMixin):
15
+ """Class PredictionSet: Obtain prediction sets.
16
+
17
+ Attributes:
18
+
19
+ obj: an object;
20
+ fitted object containing methods `fit` and `predict`
21
+
22
+ method: a string;
23
+ method for constructing the prediction sets.
24
+ Currently "icp" (default, inductive conformal) and "tcp" (transductive conformal)
25
+
26
+ level: a float;
27
+ Confidence level for prediction sets. Default is None,
28
+ 95 is equivalent to a miscoverage error of 5 (%)
29
+
30
+ seed: an integer;
31
+ Reproducibility of fit (there's a random split between fitting and calibration data)
32
+ """
33
+
34
+ def __init__(
35
+ self,
36
+ obj,
37
+ method="icp",
38
+ level=None,
39
+ seed=123,
40
+ ):
41
+
42
+ self.obj = obj
43
+ self.method = method
44
+ self.level = level
45
+ self.seed = seed
46
+ if self.level is not None:
47
+ self.alpha_ = 1 - self.level / 100
48
+ self.quantile_ = None
49
+ self.icp_ = None
50
+ self.tcp_ = None
51
+
52
+ if self.method == "icp":
53
+ self.icp_ = IcpClassifier(
54
+ ClassifierNc(ClassifierAdapter(self.obj), MarginErrFunc()),
55
+ )
56
+ elif self.method == "tcp":
57
+ self.tcp_ = TcpClassifier(
58
+ ClassifierNc(ClassifierAdapter(self.obj), MarginErrFunc()),
59
+ )
60
+ else:
61
+ raise ValueError(
62
+ "`self.method` must be in ('icp', 'tcp')"
63
+ )
64
+
65
+
66
+ def fit(self, X, y):
67
+ """Fit the `method` to training data (X, y).
68
+
69
+ Args:
70
+
71
+ X: array-like, shape = [n_samples, n_features];
72
+ Training set vectors, where n_samples is the number
73
+ of samples and n_features is the number of features.
74
+
75
+ y: array-like, shape = [n_samples, ]; Target values.
76
+
77
+ """
78
+ if self.method == "icp":
79
+
80
+ X_train, X_calibration, y_train, y_calibration = train_test_split(
81
+ X, y, test_size=0.5, random_state=self.seed)
82
+ self.icp_.fit(X_train, y_train)
83
+ self.icp_.calibrate(X_calibration, y_calibration)
84
+
85
+ elif self.method == "tcp":
86
+
87
+ self.tcp_.fit(X, y)
88
+
89
+ return self
90
+
91
+ def predict(self, X):
92
+ """Obtain predictions and prediction sets
93
+
94
+ Args:
95
+
96
+ X: array-like, shape = [n_samples, n_features];
97
+ Testing set vectors, where n_samples is the number
98
+ of samples and n_features is the number of features.
99
+
100
+ """
101
+
102
+ if self.method == "icp":
103
+ return self.icp_.predict(X, significance=self.alpha_)
104
+
105
+ elif self.method == "tcp":
106
+ return self.tcp_.predict(X, significance=self.alpha_)
107
+
108
+ else:
109
+ raise ValueError(
110
+ "`self.method` must be in ('icp', 'tcp')"
111
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unifiedbooster
3
- Version: 0.6.0
3
+ Version: 0.7.0
4
4
  Summary: Unified interface for Gradient Boosted Decision Trees
5
5
  Home-page: https://github.com/thierrymoudiki/unifiedbooster
6
6
  Author: T. Moudiki
@@ -0,0 +1,23 @@
1
+ unifiedbooster/__init__.py,sha256=8FEkWCZ2tT8xcW46Z0X_BS9_r0kQWVAu37IncLq6QWU,301
2
+ unifiedbooster/gbdt.py,sha256=oAG-dQRY3FG9Tdhdb0iZuupMOAj1_KcGQbp47AHc72o,5175
3
+ unifiedbooster/gbdt_classification.py,sha256=c9MYlPeTjQ4pAy0CZtroid9UfhQAlQVHekCWbbTIMBQ,5798
4
+ unifiedbooster/gbdt_regression.py,sha256=tHi8XJ1jS2LuXdQoRDsTkFK3qt3L-4kQ9IRsMNW37gI,5351
5
+ unifiedbooster/gpoptimization.py,sha256=UoT20E5dfhREiY7Cqo0vCktBzDBRnnG_6Xyg426vdfk,15238
6
+ unifiedbooster/nonconformist/__init__.py,sha256=rETO9FfHb_yWs4ttLa2FJb2NAy-KFnyESeBTltDwJQA,739
7
+ unifiedbooster/nonconformist/acp.py,sha256=SrfBVCWjXvntkBJ7GXTFYE6i6NU3Pv-5ibwhpItDKDw,11553
8
+ unifiedbooster/nonconformist/base.py,sha256=Ycyt6pwxo0QjD3qBAfDqjzFvFfknIMkX0_yIc6EtPFo,4028
9
+ unifiedbooster/nonconformist/cp.py,sha256=YKiBFKwvaJbWnJcgi-saiVD_2ci-LBDHgytf70jHvFg,6174
10
+ unifiedbooster/nonconformist/evaluation.py,sha256=b24buhhW3v3CKRSi69WKCq9Sb38Unmjr8TAZr66Cdns,15906
11
+ unifiedbooster/nonconformist/icp.py,sha256=wqOaoy22KiF2ebVQOjp8MR-zvEjT0hE0NiMfeNZOQEw,15982
12
+ unifiedbooster/nonconformist/nc.py,sha256=_ED8Yn068Ivio9Xr0SjwKh4Ts5MfUACZFY40ObxPJ60,19644
13
+ unifiedbooster/nonconformist/util.py,sha256=UBKlAEb0mj9MVWBOKCRAq_OQP5Z53FMqWlTyo7RIg5Q,242
14
+ unifiedbooster/predictioninterval/__init__.py,sha256=I1X1omp6Bsuzfm7z8TCSICe2175rHrdoXWEDOicOP8U,85
15
+ unifiedbooster/predictioninterval/predictioninterval.py,sha256=6XQnJQDpsWG-uu5yFxeZQewnrErAjZLzv21YvtarXZQ,11164
16
+ unifiedbooster/predictionset/__init__.py,sha256=IGhWVX8-VeZ15HeLFWu8QeKCz7DIE4TlEXMjTnB3VdE,70
17
+ unifiedbooster/predictionset/predictionset.py,sha256=k9s2PYK2KvOfDoGfSGXUHwwNA9kL2VYiT2JPokwZ8YA,3415
18
+ unifiedbooster-0.7.0.dist-info/LICENSE,sha256=3rWw63btcdqbC0XMnpzCQhxDP8Vx7yKkKS7EDgJiY_4,1061
19
+ unifiedbooster-0.7.0.dist-info/METADATA,sha256=7vR-c8aCOeF-96Uv9uBTugKmA-QC71b_5NyejATpnDM,955
20
+ unifiedbooster-0.7.0.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
21
+ unifiedbooster-0.7.0.dist-info/entry_points.txt,sha256=OVNTsCzMYnaJ11WIByB7G8Lym_dj-ERKZyQxWFUcW30,59
22
+ unifiedbooster-0.7.0.dist-info/top_level.txt,sha256=gOMxxpRtx8_nJXTWsXJDFkNeCsjSJQPs6aUXKK5_nI4,15
23
+ unifiedbooster-0.7.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (72.1.0)
2
+ Generator: setuptools (74.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,19 +0,0 @@
1
- unifiedbooster/__init__.py,sha256=8FEkWCZ2tT8xcW46Z0X_BS9_r0kQWVAu37IncLq6QWU,301
2
- unifiedbooster/gbdt.py,sha256=u5Sjw-V8BlDS4LUo_SNOfuz66EFcJhP1Al6Es41R_X8,4932
3
- unifiedbooster/gbdt_classification.py,sha256=wifw86cUvsyiKSz8MTxIgH6j7Gd1voIxXUiJVsE68bk,4219
4
- unifiedbooster/gbdt_regression.py,sha256=YQIDtW4hV7DxHAHuoMMkD1aRy0dzVXxx2rwPu3InTA8,3710
5
- unifiedbooster/gpoptimization.py,sha256=GvOcJLNtI5PD7umbnuXEM_zmL9zpzXhWE4zBlPiksiY,15292
6
- unifiedbooster/nonconformist/__init__.py,sha256=GFRHsqNvx7JrBhph5p9iMtVgcjOjyMYFyt6rwCbAbpg,568
7
- unifiedbooster/nonconformist/acp.py,sha256=SrfBVCWjXvntkBJ7GXTFYE6i6NU3Pv-5ibwhpItDKDw,11553
8
- unifiedbooster/nonconformist/base.py,sha256=0Iiuz_34KXZasbTkbXwXG2-1HiDbG7LpCQypTHuGtcs,3958
9
- unifiedbooster/nonconformist/cp.py,sha256=YKiBFKwvaJbWnJcgi-saiVD_2ci-LBDHgytf70jHvFg,6174
10
- unifiedbooster/nonconformist/evaluation.py,sha256=b24buhhW3v3CKRSi69WKCq9Sb38Unmjr8TAZr66Cdns,15906
11
- unifiedbooster/nonconformist/icp.py,sha256=wqOaoy22KiF2ebVQOjp8MR-zvEjT0hE0NiMfeNZOQEw,15982
12
- unifiedbooster/nonconformist/nc.py,sha256=_ED8Yn068Ivio9Xr0SjwKh4Ts5MfUACZFY40ObxPJ60,19644
13
- unifiedbooster/nonconformist/util.py,sha256=UBKlAEb0mj9MVWBOKCRAq_OQP5Z53FMqWlTyo7RIg5Q,242
14
- unifiedbooster-0.6.0.dist-info/LICENSE,sha256=3rWw63btcdqbC0XMnpzCQhxDP8Vx7yKkKS7EDgJiY_4,1061
15
- unifiedbooster-0.6.0.dist-info/METADATA,sha256=plXwSokDSPdwYPHo8WXgP1F8Tk_d3wjXMwiFwYCwi1I,955
16
- unifiedbooster-0.6.0.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
17
- unifiedbooster-0.6.0.dist-info/entry_points.txt,sha256=OVNTsCzMYnaJ11WIByB7G8Lym_dj-ERKZyQxWFUcW30,59
18
- unifiedbooster-0.6.0.dist-info/top_level.txt,sha256=gOMxxpRtx8_nJXTWsXJDFkNeCsjSJQPs6aUXKK5_nI4,15
19
- unifiedbooster-0.6.0.dist-info/RECORD,,