unifiedbooster 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unifiedbooster/gpoptimization.py +115 -57
- unifiedbooster/nonconformist/__init__.py +30 -0
- unifiedbooster/nonconformist/acp.py +381 -0
- unifiedbooster/nonconformist/base.py +156 -0
- unifiedbooster/nonconformist/cp.py +172 -0
- unifiedbooster/nonconformist/evaluation.py +486 -0
- unifiedbooster/nonconformist/icp.py +442 -0
- unifiedbooster/nonconformist/nc.py +610 -0
- unifiedbooster/nonconformist/util.py +9 -0
- {unifiedbooster-0.5.0.dist-info → unifiedbooster-0.6.0.dist-info}/METADATA +1 -1
- unifiedbooster-0.6.0.dist-info/RECORD +19 -0
- unifiedbooster-0.5.0.dist-info/RECORD +0 -11
- {unifiedbooster-0.5.0.dist-info → unifiedbooster-0.6.0.dist-info}/LICENSE +0 -0
- {unifiedbooster-0.5.0.dist-info → unifiedbooster-0.6.0.dist-info}/WHEEL +0 -0
- {unifiedbooster-0.5.0.dist-info → unifiedbooster-0.6.0.dist-info}/entry_points.txt +0 -0
- {unifiedbooster-0.5.0.dist-info → unifiedbooster-0.6.0.dist-info}/top_level.txt +0 -0
unifiedbooster/gpoptimization.py
CHANGED
|
@@ -18,7 +18,7 @@ def cross_val_optim(
|
|
|
18
18
|
model_type="xgboost",
|
|
19
19
|
type_fit="classification",
|
|
20
20
|
scoring="accuracy",
|
|
21
|
-
n_estimators=
|
|
21
|
+
n_estimators=None,
|
|
22
22
|
surrogate_obj=None,
|
|
23
23
|
cv=5,
|
|
24
24
|
n_jobs=None,
|
|
@@ -59,7 +59,7 @@ def cross_val_optim(
|
|
|
59
59
|
scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules
|
|
60
60
|
|
|
61
61
|
n_estimators: int
|
|
62
|
-
maximum number of trees that can be built
|
|
62
|
+
maximum number of trees that can be built (default is None, and if None, then the parameter is tuned)
|
|
63
63
|
|
|
64
64
|
surrogate_obj: an object;
|
|
65
65
|
An ML model for estimating the uncertainty around the objective function
|
|
@@ -168,63 +168,121 @@ def cross_val_optim(
|
|
|
168
168
|
).mean()
|
|
169
169
|
|
|
170
170
|
# objective function for hyperparams tuning
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
171
|
+
if n_estimators is not None:
|
|
172
|
+
def crossval_objective(xx):
|
|
173
|
+
return gbdt_cv(
|
|
174
|
+
X_train=X_train,
|
|
175
|
+
y_train=y_train,
|
|
176
|
+
model_type=model_type,
|
|
177
|
+
n_estimators=n_estimators,
|
|
178
|
+
learning_rate=10 ** xx[0],
|
|
179
|
+
max_depth=int(xx[1]),
|
|
180
|
+
rowsample=xx[2],
|
|
181
|
+
colsample=xx[3],
|
|
182
|
+
cv=cv,
|
|
183
|
+
n_jobs=n_jobs,
|
|
184
|
+
type_fit=type_fit,
|
|
185
|
+
scoring=scoring,
|
|
186
|
+
seed=seed,
|
|
187
|
+
)
|
|
188
|
+
else: # n_estimators is None
|
|
189
|
+
def crossval_objective(xx):
|
|
190
|
+
return gbdt_cv(
|
|
191
|
+
X_train=X_train,
|
|
192
|
+
y_train=y_train,
|
|
193
|
+
model_type=model_type,
|
|
194
|
+
n_estimators=int(10 ** xx[4]),
|
|
195
|
+
learning_rate=10 ** xx[0],
|
|
196
|
+
max_depth=int(xx[1]),
|
|
197
|
+
rowsample=xx[2],
|
|
198
|
+
colsample=xx[3],
|
|
199
|
+
cv=cv,
|
|
200
|
+
n_jobs=n_jobs,
|
|
201
|
+
type_fit=type_fit,
|
|
202
|
+
scoring=scoring,
|
|
203
|
+
seed=seed,
|
|
204
|
+
)
|
|
187
205
|
|
|
188
|
-
if
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
206
|
+
if n_estimators is not None:
|
|
207
|
+
if surrogate_obj is None:
|
|
208
|
+
gp_opt = gp.GPOpt(
|
|
209
|
+
objective_func=crossval_objective,
|
|
210
|
+
lower_bound=np.array([-6, 1, 0.5, 0.5]),
|
|
211
|
+
upper_bound=np.array([0, 16, 1.0, 1.0]),
|
|
212
|
+
params_names=[
|
|
213
|
+
"learning_rate",
|
|
214
|
+
"max_depth",
|
|
215
|
+
"rowsample",
|
|
216
|
+
"colsample",
|
|
217
|
+
],
|
|
218
|
+
method="bayesian",
|
|
219
|
+
n_init=n_init,
|
|
220
|
+
n_iter=n_iter,
|
|
221
|
+
seed=seed,
|
|
222
|
+
)
|
|
223
|
+
else:
|
|
224
|
+
gp_opt = gp.GPOpt(
|
|
225
|
+
objective_func=crossval_objective,
|
|
226
|
+
lower_bound=np.array([-6, 1, 0.5, 0.5]),
|
|
227
|
+
upper_bound=np.array([0, 16, 1.0, 1.0]),
|
|
228
|
+
params_names=[
|
|
229
|
+
"learning_rate",
|
|
230
|
+
"max_depth",
|
|
231
|
+
"rowsample",
|
|
232
|
+
"colsample",
|
|
233
|
+
],
|
|
234
|
+
acquisition="ucb",
|
|
235
|
+
method="splitconformal",
|
|
236
|
+
surrogate_obj=ns.PredictionInterval(
|
|
237
|
+
obj=surrogate_obj, method="splitconformal"
|
|
238
|
+
),
|
|
239
|
+
n_init=n_init,
|
|
240
|
+
n_iter=n_iter,
|
|
241
|
+
seed=seed,
|
|
242
|
+
)
|
|
243
|
+
else: # n_estimators is None
|
|
244
|
+
if surrogate_obj is None:
|
|
245
|
+
gp_opt = gp.GPOpt(
|
|
246
|
+
objective_func=crossval_objective,
|
|
247
|
+
lower_bound=np.array([-6, 1, 0.5, 0.5, 2]),
|
|
248
|
+
upper_bound=np.array([0, 16, 1.0, 1.0, 3]),
|
|
249
|
+
params_names=[
|
|
250
|
+
"learning_rate",
|
|
251
|
+
"max_depth",
|
|
252
|
+
"rowsample",
|
|
253
|
+
"colsample",
|
|
254
|
+
"n_estimators"
|
|
255
|
+
],
|
|
256
|
+
method="bayesian",
|
|
257
|
+
n_init=n_init,
|
|
258
|
+
n_iter=n_iter,
|
|
259
|
+
seed=seed,
|
|
260
|
+
)
|
|
261
|
+
else:
|
|
262
|
+
gp_opt = gp.GPOpt(
|
|
263
|
+
objective_func=crossval_objective,
|
|
264
|
+
lower_bound=np.array([-6, 1, 0.5, 0.5, 2]),
|
|
265
|
+
upper_bound=np.array([0, 16, 1.0, 1.0, 3]),
|
|
266
|
+
params_names=[
|
|
267
|
+
"learning_rate",
|
|
268
|
+
"max_depth",
|
|
269
|
+
"rowsample",
|
|
270
|
+
"colsample",
|
|
271
|
+
"n_estimators"
|
|
272
|
+
],
|
|
273
|
+
acquisition="ucb",
|
|
274
|
+
method="splitconformal",
|
|
275
|
+
surrogate_obj=ns.PredictionInterval(
|
|
276
|
+
obj=surrogate_obj, method="splitconformal"
|
|
277
|
+
),
|
|
278
|
+
n_init=n_init,
|
|
279
|
+
n_iter=n_iter,
|
|
280
|
+
seed=seed,
|
|
281
|
+
)
|
|
224
282
|
|
|
225
283
|
res = gp_opt.optimize(verbose=verbose, abs_tol=abs_tol)
|
|
226
284
|
res.best_params["model_type"] = model_type
|
|
227
|
-
res.best_params["n_estimators"] = int(n_estimators)
|
|
285
|
+
res.best_params["n_estimators"] = int(n_estimators) if n_estimators is not None else int(10 ** res.best_params["n_estimators"])
|
|
228
286
|
res.best_params["learning_rate"] = 10 ** res.best_params["learning_rate"]
|
|
229
287
|
res.best_params["max_depth"] = int(res.best_params["max_depth"])
|
|
230
288
|
res.best_params["rowsample"] = res.best_params["rowsample"]
|
|
@@ -256,7 +314,7 @@ def lazy_cross_val_optim(
|
|
|
256
314
|
type_fit="classification",
|
|
257
315
|
scoring="accuracy",
|
|
258
316
|
customize=False,
|
|
259
|
-
n_estimators=
|
|
317
|
+
n_estimators=None,
|
|
260
318
|
cv=5,
|
|
261
319
|
n_jobs=None,
|
|
262
320
|
n_init=10,
|
|
@@ -299,7 +357,7 @@ def lazy_cross_val_optim(
|
|
|
299
357
|
if True, the surrogate is transformed into a quasi-randomized network (default is False)
|
|
300
358
|
|
|
301
359
|
n_estimators: int
|
|
302
|
-
maximum number of trees that can be built
|
|
360
|
+
maximum number of trees that can be built (default is None, if None, the parameters is tuned)
|
|
303
361
|
|
|
304
362
|
cv: int;
|
|
305
363
|
number of cross-validation folds
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
docstring
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
# Authors: Henrik Linusson
|
|
8
|
+
# Yaniv Romano modified np.py file to include CQR
|
|
9
|
+
# T. Moudiki modified __init__.py to import classes
|
|
10
|
+
|
|
11
|
+
# __version__ = '2.1.0'
|
|
12
|
+
|
|
13
|
+
from .nc import (
|
|
14
|
+
AbsErrorErrFunc,
|
|
15
|
+
QuantileRegErrFunc,
|
|
16
|
+
RegressorNc,
|
|
17
|
+
RegressorNormalizer,
|
|
18
|
+
)
|
|
19
|
+
from .cp import IcpRegressor, TcpClassifier
|
|
20
|
+
from .icp import IcpClassifier
|
|
21
|
+
from .base import RegressorAdapter
|
|
22
|
+
|
|
23
|
+
__all__ = [
|
|
24
|
+
"AbsErrorErrFunc",
|
|
25
|
+
"QuantileRegErrFunc",
|
|
26
|
+
"RegressorAdapter",
|
|
27
|
+
"RegressorNc",
|
|
28
|
+
"RegressorNormalizer",
|
|
29
|
+
"IcpRegressor",
|
|
30
|
+
]
|
|
@@ -0,0 +1,381 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
Aggregated conformal predictors
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
# Authors: Henrik Linusson
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
from sklearn.model_selection import KFold, StratifiedKFold
|
|
11
|
+
from sklearn.model_selection import ShuffleSplit, StratifiedShuffleSplit
|
|
12
|
+
from sklearn.base import clone
|
|
13
|
+
from nonconformist.base import BaseEstimator
|
|
14
|
+
from nonconformist.util import calc_p
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# -----------------------------------------------------------------------------
|
|
18
|
+
# Sampling strategies
|
|
19
|
+
# -----------------------------------------------------------------------------
|
|
20
|
+
class BootstrapSampler(object):
|
|
21
|
+
"""Bootstrap sampler.
|
|
22
|
+
|
|
23
|
+
See also
|
|
24
|
+
--------
|
|
25
|
+
CrossSampler, RandomSubSampler
|
|
26
|
+
|
|
27
|
+
Examples
|
|
28
|
+
--------
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def gen_samples(self, y, n_samples, problem_type):
|
|
32
|
+
for i in range(n_samples):
|
|
33
|
+
idx = np.array(range(y.size))
|
|
34
|
+
train = np.random.choice(y.size, y.size, replace=True)
|
|
35
|
+
cal_mask = np.array(np.ones(idx.size), dtype=bool)
|
|
36
|
+
for j in train:
|
|
37
|
+
cal_mask[j] = False
|
|
38
|
+
cal = idx[cal_mask]
|
|
39
|
+
|
|
40
|
+
yield train, cal
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class CrossSampler(object):
|
|
44
|
+
"""Cross-fold sampler.
|
|
45
|
+
|
|
46
|
+
See also
|
|
47
|
+
--------
|
|
48
|
+
BootstrapSampler, RandomSubSampler
|
|
49
|
+
|
|
50
|
+
Examples
|
|
51
|
+
--------
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def gen_samples(self, y, n_samples, problem_type):
|
|
55
|
+
if problem_type == "classification":
|
|
56
|
+
folds = StratifiedKFold(y, n_folds=n_samples)
|
|
57
|
+
else:
|
|
58
|
+
folds = KFold(y.size, n_folds=n_samples)
|
|
59
|
+
for train, cal in folds:
|
|
60
|
+
yield train, cal
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class RandomSubSampler(object):
|
|
64
|
+
"""Random subsample sampler.
|
|
65
|
+
|
|
66
|
+
Parameters
|
|
67
|
+
----------
|
|
68
|
+
calibration_portion : float
|
|
69
|
+
Ratio (0-1) of examples to use for calibration.
|
|
70
|
+
|
|
71
|
+
See also
|
|
72
|
+
--------
|
|
73
|
+
BootstrapSampler, CrossSampler
|
|
74
|
+
|
|
75
|
+
Examples
|
|
76
|
+
--------
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
def __init__(self, calibration_portion=0.3):
|
|
80
|
+
self.cal_portion = calibration_portion
|
|
81
|
+
|
|
82
|
+
def gen_samples(self, y, n_samples, problem_type):
|
|
83
|
+
if problem_type == "classification":
|
|
84
|
+
splits = StratifiedShuffleSplit(
|
|
85
|
+
y, n_iter=n_samples, test_size=self.cal_portion
|
|
86
|
+
)
|
|
87
|
+
else:
|
|
88
|
+
splits = ShuffleSplit(
|
|
89
|
+
y.size, n_iter=n_samples, test_size=self.cal_portion
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
for train, cal in splits:
|
|
93
|
+
yield train, cal
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# -----------------------------------------------------------------------------
|
|
97
|
+
# Conformal ensemble
|
|
98
|
+
# -----------------------------------------------------------------------------
|
|
99
|
+
class AggregatedCp(BaseEstimator):
|
|
100
|
+
"""Aggregated conformal predictor.
|
|
101
|
+
|
|
102
|
+
Combines multiple IcpClassifier or IcpRegressor predictors into an
|
|
103
|
+
aggregated model.
|
|
104
|
+
|
|
105
|
+
Parameters
|
|
106
|
+
----------
|
|
107
|
+
predictor : object
|
|
108
|
+
Prototype conformal predictor (e.g. IcpClassifier or IcpRegressor)
|
|
109
|
+
used for defining conformal predictors included in the aggregate model.
|
|
110
|
+
|
|
111
|
+
sampler : object
|
|
112
|
+
Sampler object used to generate training and calibration examples
|
|
113
|
+
for the underlying conformal predictors.
|
|
114
|
+
|
|
115
|
+
aggregation_func : callable
|
|
116
|
+
Function used to aggregate the predictions of the underlying
|
|
117
|
+
conformal predictors. Defaults to ``numpy.mean``.
|
|
118
|
+
|
|
119
|
+
n_models : int
|
|
120
|
+
Number of models to aggregate.
|
|
121
|
+
|
|
122
|
+
Attributes
|
|
123
|
+
----------
|
|
124
|
+
predictor : object
|
|
125
|
+
Prototype conformal predictor.
|
|
126
|
+
|
|
127
|
+
predictors : list
|
|
128
|
+
List of underlying conformal predictors.
|
|
129
|
+
|
|
130
|
+
sampler : object
|
|
131
|
+
Sampler object used to generate training and calibration examples.
|
|
132
|
+
|
|
133
|
+
agg_func : callable
|
|
134
|
+
Function used to aggregate the predictions of the underlying
|
|
135
|
+
conformal predictors
|
|
136
|
+
|
|
137
|
+
References
|
|
138
|
+
----------
|
|
139
|
+
.. [1] Vovk, V. (2013). Cross-conformal predictors. Annals of Mathematics
|
|
140
|
+
and Artificial Intelligence, 1-20.
|
|
141
|
+
|
|
142
|
+
.. [2] Carlsson, L., Eklund, M., & Norinder, U. (2014). Aggregated
|
|
143
|
+
Conformal Prediction. In Artificial Intelligence Applications and
|
|
144
|
+
Innovations (pp. 231-240). Springer Berlin Heidelberg.
|
|
145
|
+
|
|
146
|
+
Examples
|
|
147
|
+
--------
|
|
148
|
+
"""
|
|
149
|
+
|
|
150
|
+
def __init__(
|
|
151
|
+
self,
|
|
152
|
+
predictor,
|
|
153
|
+
sampler=BootstrapSampler(),
|
|
154
|
+
aggregation_func=None,
|
|
155
|
+
n_models=10,
|
|
156
|
+
):
|
|
157
|
+
self.predictors = []
|
|
158
|
+
self.n_models = n_models
|
|
159
|
+
self.predictor = predictor
|
|
160
|
+
self.sampler = sampler
|
|
161
|
+
|
|
162
|
+
if aggregation_func is not None:
|
|
163
|
+
self.agg_func = aggregation_func
|
|
164
|
+
else:
|
|
165
|
+
self.agg_func = lambda x: np.mean(x, axis=2)
|
|
166
|
+
|
|
167
|
+
def fit(self, x, y):
|
|
168
|
+
"""Fit underlying conformal predictors.
|
|
169
|
+
|
|
170
|
+
Parameters
|
|
171
|
+
----------
|
|
172
|
+
x : numpy array of shape [n_samples, n_features]
|
|
173
|
+
Inputs of examples for fitting the underlying conformal predictors.
|
|
174
|
+
|
|
175
|
+
y : numpy array of shape [n_samples]
|
|
176
|
+
Outputs of examples for fitting the underlying conformal predictors.
|
|
177
|
+
|
|
178
|
+
Returns
|
|
179
|
+
-------
|
|
180
|
+
None
|
|
181
|
+
"""
|
|
182
|
+
self.n_train = y.size
|
|
183
|
+
self.predictors = []
|
|
184
|
+
idx = np.random.permutation(y.size)
|
|
185
|
+
x, y = x[idx, :], y[idx]
|
|
186
|
+
problem_type = self.predictor.__class__.get_problem_type()
|
|
187
|
+
samples = self.sampler.gen_samples(y, self.n_models, problem_type)
|
|
188
|
+
for train, cal in samples:
|
|
189
|
+
predictor = clone(self.predictor)
|
|
190
|
+
predictor.fit(x[train, :], y[train])
|
|
191
|
+
predictor.calibrate(x[cal, :], y[cal])
|
|
192
|
+
self.predictors.append(predictor)
|
|
193
|
+
|
|
194
|
+
if problem_type == "classification":
|
|
195
|
+
self.classes = self.predictors[0].classes
|
|
196
|
+
|
|
197
|
+
def predict(self, x, significance=None):
|
|
198
|
+
"""Predict the output values for a set of input patterns.
|
|
199
|
+
|
|
200
|
+
Parameters
|
|
201
|
+
----------
|
|
202
|
+
x : numpy array of shape [n_samples, n_features]
|
|
203
|
+
Inputs of patters for which to predict output values.
|
|
204
|
+
|
|
205
|
+
significance : float or None
|
|
206
|
+
Significance level (maximum allowed error rate) of predictions.
|
|
207
|
+
Should be a float between 0 and 1. If ``None``, then the p-values
|
|
208
|
+
are output rather than the predictions. Note: ``significance=None``
|
|
209
|
+
is applicable to classification problems only.
|
|
210
|
+
|
|
211
|
+
Returns
|
|
212
|
+
-------
|
|
213
|
+
p : numpy array of shape [n_samples, n_classes] or [n_samples, 2]
|
|
214
|
+
For classification problems: If significance is ``None``, then p
|
|
215
|
+
contains the p-values for each sample-class pair; if significance
|
|
216
|
+
is a float between 0 and 1, then p is a boolean array denoting
|
|
217
|
+
which labels are included in the prediction sets.
|
|
218
|
+
|
|
219
|
+
For regression problems: Prediction interval (minimum and maximum
|
|
220
|
+
boundaries) for the set of test patterns.
|
|
221
|
+
"""
|
|
222
|
+
is_regression = (
|
|
223
|
+
self.predictor.__class__.get_problem_type() == "regression"
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
n_examples = x.shape[0]
|
|
227
|
+
|
|
228
|
+
if is_regression and significance is None:
|
|
229
|
+
signs = np.arange(0.01, 1.0, 0.01)
|
|
230
|
+
pred = np.zeros((n_examples, 2, signs.size))
|
|
231
|
+
for i, s in enumerate(signs):
|
|
232
|
+
predictions = np.dstack(
|
|
233
|
+
[p.predict(x, s) for p in self.predictors]
|
|
234
|
+
)
|
|
235
|
+
predictions = self.agg_func(predictions)
|
|
236
|
+
pred[:, :, i] = predictions
|
|
237
|
+
return pred
|
|
238
|
+
else:
|
|
239
|
+
|
|
240
|
+
def f(p, x):
|
|
241
|
+
return p.predict(x, significance if is_regression else None)
|
|
242
|
+
|
|
243
|
+
predictions = np.dstack([f(p, x) for p in self.predictors])
|
|
244
|
+
predictions = self.agg_func(predictions)
|
|
245
|
+
|
|
246
|
+
if significance and not is_regression:
|
|
247
|
+
return predictions >= significance
|
|
248
|
+
else:
|
|
249
|
+
return predictions
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
class CrossConformalClassifier(AggregatedCp):
|
|
253
|
+
"""Cross-conformal classifier.
|
|
254
|
+
|
|
255
|
+
Combines multiple IcpClassifiers into a cross-conformal classifier.
|
|
256
|
+
|
|
257
|
+
Parameters
|
|
258
|
+
----------
|
|
259
|
+
predictor : object
|
|
260
|
+
Prototype conformal predictor (e.g. IcpClassifier or IcpRegressor)
|
|
261
|
+
used for defining conformal predictors included in the aggregate model.
|
|
262
|
+
|
|
263
|
+
aggregation_func : callable
|
|
264
|
+
Function used to aggregate the predictions of the underlying
|
|
265
|
+
conformal predictors. Defaults to ``numpy.mean``.
|
|
266
|
+
|
|
267
|
+
n_models : int
|
|
268
|
+
Number of models to aggregate.
|
|
269
|
+
|
|
270
|
+
Attributes
|
|
271
|
+
----------
|
|
272
|
+
predictor : object
|
|
273
|
+
Prototype conformal predictor.
|
|
274
|
+
|
|
275
|
+
predictors : list
|
|
276
|
+
List of underlying conformal predictors.
|
|
277
|
+
|
|
278
|
+
sampler : object
|
|
279
|
+
Sampler object used to generate training and calibration examples.
|
|
280
|
+
|
|
281
|
+
agg_func : callable
|
|
282
|
+
Function used to aggregate the predictions of the underlying
|
|
283
|
+
conformal predictors
|
|
284
|
+
|
|
285
|
+
References
|
|
286
|
+
----------
|
|
287
|
+
.. [1] Vovk, V. (2013). Cross-conformal predictors. Annals of Mathematics
|
|
288
|
+
and Artificial Intelligence, 1-20.
|
|
289
|
+
|
|
290
|
+
Examples
|
|
291
|
+
--------
|
|
292
|
+
"""
|
|
293
|
+
|
|
294
|
+
def __init__(self, predictor, n_models=10):
|
|
295
|
+
super(CrossConformalClassifier, self).__init__(
|
|
296
|
+
predictor, CrossSampler(), n_models
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
def predict(self, x, significance=None):
|
|
300
|
+
ncal_ngt_neq = np.stack(
|
|
301
|
+
[p._get_stats(x) for p in self.predictors], axis=3
|
|
302
|
+
)
|
|
303
|
+
ncal_ngt_neq = ncal_ngt_neq.sum(axis=3)
|
|
304
|
+
|
|
305
|
+
p = calc_p(
|
|
306
|
+
ncal_ngt_neq[:, :, 0],
|
|
307
|
+
ncal_ngt_neq[:, :, 1],
|
|
308
|
+
ncal_ngt_neq[:, :, 2],
|
|
309
|
+
smoothing=self.predictors[0].smoothing,
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
if significance:
|
|
313
|
+
return p > significance
|
|
314
|
+
else:
|
|
315
|
+
return p
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
class BootstrapConformalClassifier(AggregatedCp):
|
|
319
|
+
"""Bootstrap conformal classifier.
|
|
320
|
+
|
|
321
|
+
Combines multiple IcpClassifiers into a bootstrap conformal classifier.
|
|
322
|
+
|
|
323
|
+
Parameters
|
|
324
|
+
----------
|
|
325
|
+
predictor : object
|
|
326
|
+
Prototype conformal predictor (e.g. IcpClassifier or IcpRegressor)
|
|
327
|
+
used for defining conformal predictors included in the aggregate model.
|
|
328
|
+
|
|
329
|
+
aggregation_func : callable
|
|
330
|
+
Function used to aggregate the predictions of the underlying
|
|
331
|
+
conformal predictors. Defaults to ``numpy.mean``.
|
|
332
|
+
|
|
333
|
+
n_models : int
|
|
334
|
+
Number of models to aggregate.
|
|
335
|
+
|
|
336
|
+
Attributes
|
|
337
|
+
----------
|
|
338
|
+
predictor : object
|
|
339
|
+
Prototype conformal predictor.
|
|
340
|
+
|
|
341
|
+
predictors : list
|
|
342
|
+
List of underlying conformal predictors.
|
|
343
|
+
|
|
344
|
+
sampler : object
|
|
345
|
+
Sampler object used to generate training and calibration examples.
|
|
346
|
+
|
|
347
|
+
agg_func : callable
|
|
348
|
+
Function used to aggregate the predictions of the underlying
|
|
349
|
+
conformal predictors
|
|
350
|
+
|
|
351
|
+
References
|
|
352
|
+
----------
|
|
353
|
+
.. [1] Vovk, V. (2013). Cross-conformal predictors. Annals of Mathematics
|
|
354
|
+
and Artificial Intelligence, 1-20.
|
|
355
|
+
|
|
356
|
+
Examples
|
|
357
|
+
--------
|
|
358
|
+
"""
|
|
359
|
+
|
|
360
|
+
def __init__(self, predictor, n_models=10):
|
|
361
|
+
super(BootstrapConformalClassifier, self).__init__(
|
|
362
|
+
predictor, BootstrapSampler(), n_models
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
def predict(self, x, significance=None):
|
|
366
|
+
ncal_ngt_neq = np.stack(
|
|
367
|
+
[p._get_stats(x) for p in self.predictors], axis=3
|
|
368
|
+
)
|
|
369
|
+
ncal_ngt_neq = ncal_ngt_neq.sum(axis=3)
|
|
370
|
+
|
|
371
|
+
p = calc_p(
|
|
372
|
+
ncal_ngt_neq[:, :, 0] + ncal_ngt_neq[:, :, 0] / self.n_train,
|
|
373
|
+
ncal_ngt_neq[:, :, 1] + ncal_ngt_neq[:, :, 0] / self.n_train,
|
|
374
|
+
ncal_ngt_neq[:, :, 2],
|
|
375
|
+
smoothing=self.predictors[0].smoothing,
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
if significance:
|
|
379
|
+
return p > significance
|
|
380
|
+
else:
|
|
381
|
+
return p
|