unifiedbooster 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unifiedbooster/__init__.py +12 -0
- unifiedbooster/gbdt.py +161 -0
- unifiedbooster/gbdt_classification.py +188 -0
- unifiedbooster/gbdt_regression.py +180 -0
- unifiedbooster/gpoptimization.py +493 -0
- unifiedbooster/nonconformist/__init__.py +36 -0
- unifiedbooster/nonconformist/acp.py +381 -0
- unifiedbooster/nonconformist/base.py +156 -0
- unifiedbooster/nonconformist/cp.py +172 -0
- unifiedbooster/nonconformist/evaluation.py +486 -0
- unifiedbooster/nonconformist/icp.py +442 -0
- unifiedbooster/nonconformist/nc.py +610 -0
- unifiedbooster/nonconformist/util.py +9 -0
- unifiedbooster/predictioninterval/__init__.py +3 -0
- unifiedbooster/predictioninterval/predictioninterval.py +314 -0
- unifiedbooster/predictionset/__init__.py +3 -0
- unifiedbooster/predictionset/predictionset.py +113 -0
- unifiedbooster-0.9.0.dist-info/METADATA +39 -0
- unifiedbooster-0.9.0.dist-info/RECORD +23 -0
- unifiedbooster-0.9.0.dist-info/WHEEL +5 -0
- unifiedbooster-0.9.0.dist-info/entry_points.txt +2 -0
- unifiedbooster-0.9.0.dist-info/licenses/LICENSE +7 -0
- unifiedbooster-0.9.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,381 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
Aggregated conformal predictors
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
# Authors: Henrik Linusson
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
from sklearn.model_selection import KFold, StratifiedKFold
|
|
11
|
+
from sklearn.model_selection import ShuffleSplit, StratifiedShuffleSplit
|
|
12
|
+
from sklearn.base import clone
|
|
13
|
+
from nonconformist.base import BaseEstimator
|
|
14
|
+
from nonconformist.util import calc_p
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# -----------------------------------------------------------------------------
|
|
18
|
+
# Sampling strategies
|
|
19
|
+
# -----------------------------------------------------------------------------
|
|
20
|
+
class BootstrapSampler(object):
|
|
21
|
+
"""Bootstrap sampler.
|
|
22
|
+
|
|
23
|
+
See also
|
|
24
|
+
--------
|
|
25
|
+
CrossSampler, RandomSubSampler
|
|
26
|
+
|
|
27
|
+
Examples
|
|
28
|
+
--------
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def gen_samples(self, y, n_samples, problem_type):
|
|
32
|
+
for i in range(n_samples):
|
|
33
|
+
idx = np.array(range(y.size))
|
|
34
|
+
train = np.random.choice(y.size, y.size, replace=True)
|
|
35
|
+
cal_mask = np.array(np.ones(idx.size), dtype=bool)
|
|
36
|
+
for j in train:
|
|
37
|
+
cal_mask[j] = False
|
|
38
|
+
cal = idx[cal_mask]
|
|
39
|
+
|
|
40
|
+
yield train, cal
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class CrossSampler(object):
|
|
44
|
+
"""Cross-fold sampler.
|
|
45
|
+
|
|
46
|
+
See also
|
|
47
|
+
--------
|
|
48
|
+
BootstrapSampler, RandomSubSampler
|
|
49
|
+
|
|
50
|
+
Examples
|
|
51
|
+
--------
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def gen_samples(self, y, n_samples, problem_type):
|
|
55
|
+
if problem_type == "classification":
|
|
56
|
+
folds = StratifiedKFold(y, n_folds=n_samples)
|
|
57
|
+
else:
|
|
58
|
+
folds = KFold(y.size, n_folds=n_samples)
|
|
59
|
+
for train, cal in folds:
|
|
60
|
+
yield train, cal
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class RandomSubSampler(object):
|
|
64
|
+
"""Random subsample sampler.
|
|
65
|
+
|
|
66
|
+
Parameters
|
|
67
|
+
----------
|
|
68
|
+
calibration_portion : float
|
|
69
|
+
Ratio (0-1) of examples to use for calibration.
|
|
70
|
+
|
|
71
|
+
See also
|
|
72
|
+
--------
|
|
73
|
+
BootstrapSampler, CrossSampler
|
|
74
|
+
|
|
75
|
+
Examples
|
|
76
|
+
--------
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
def __init__(self, calibration_portion=0.3):
|
|
80
|
+
self.cal_portion = calibration_portion
|
|
81
|
+
|
|
82
|
+
def gen_samples(self, y, n_samples, problem_type):
|
|
83
|
+
if problem_type == "classification":
|
|
84
|
+
splits = StratifiedShuffleSplit(
|
|
85
|
+
y, n_iter=n_samples, test_size=self.cal_portion
|
|
86
|
+
)
|
|
87
|
+
else:
|
|
88
|
+
splits = ShuffleSplit(
|
|
89
|
+
y.size, n_iter=n_samples, test_size=self.cal_portion
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
for train, cal in splits:
|
|
93
|
+
yield train, cal
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# -----------------------------------------------------------------------------
|
|
97
|
+
# Conformal ensemble
|
|
98
|
+
# -----------------------------------------------------------------------------
|
|
99
|
+
class AggregatedCp(BaseEstimator):
|
|
100
|
+
"""Aggregated conformal predictor.
|
|
101
|
+
|
|
102
|
+
Combines multiple IcpClassifier or IcpRegressor predictors into an
|
|
103
|
+
aggregated model.
|
|
104
|
+
|
|
105
|
+
Parameters
|
|
106
|
+
----------
|
|
107
|
+
predictor : object
|
|
108
|
+
Prototype conformal predictor (e.g. IcpClassifier or IcpRegressor)
|
|
109
|
+
used for defining conformal predictors included in the aggregate model.
|
|
110
|
+
|
|
111
|
+
sampler : object
|
|
112
|
+
Sampler object used to generate training and calibration examples
|
|
113
|
+
for the underlying conformal predictors.
|
|
114
|
+
|
|
115
|
+
aggregation_func : callable
|
|
116
|
+
Function used to aggregate the predictions of the underlying
|
|
117
|
+
conformal predictors. Defaults to ``numpy.mean``.
|
|
118
|
+
|
|
119
|
+
n_models : int
|
|
120
|
+
Number of models to aggregate.
|
|
121
|
+
|
|
122
|
+
Attributes
|
|
123
|
+
----------
|
|
124
|
+
predictor : object
|
|
125
|
+
Prototype conformal predictor.
|
|
126
|
+
|
|
127
|
+
predictors : list
|
|
128
|
+
List of underlying conformal predictors.
|
|
129
|
+
|
|
130
|
+
sampler : object
|
|
131
|
+
Sampler object used to generate training and calibration examples.
|
|
132
|
+
|
|
133
|
+
agg_func : callable
|
|
134
|
+
Function used to aggregate the predictions of the underlying
|
|
135
|
+
conformal predictors
|
|
136
|
+
|
|
137
|
+
References
|
|
138
|
+
----------
|
|
139
|
+
.. [1] Vovk, V. (2013). Cross-conformal predictors. Annals of Mathematics
|
|
140
|
+
and Artificial Intelligence, 1-20.
|
|
141
|
+
|
|
142
|
+
.. [2] Carlsson, L., Eklund, M., & Norinder, U. (2014). Aggregated
|
|
143
|
+
Conformal Prediction. In Artificial Intelligence Applications and
|
|
144
|
+
Innovations (pp. 231-240). Springer Berlin Heidelberg.
|
|
145
|
+
|
|
146
|
+
Examples
|
|
147
|
+
--------
|
|
148
|
+
"""
|
|
149
|
+
|
|
150
|
+
def __init__(
|
|
151
|
+
self,
|
|
152
|
+
predictor,
|
|
153
|
+
sampler=BootstrapSampler(),
|
|
154
|
+
aggregation_func=None,
|
|
155
|
+
n_models=10,
|
|
156
|
+
):
|
|
157
|
+
self.predictors = []
|
|
158
|
+
self.n_models = n_models
|
|
159
|
+
self.predictor = predictor
|
|
160
|
+
self.sampler = sampler
|
|
161
|
+
|
|
162
|
+
if aggregation_func is not None:
|
|
163
|
+
self.agg_func = aggregation_func
|
|
164
|
+
else:
|
|
165
|
+
self.agg_func = lambda x: np.mean(x, axis=2)
|
|
166
|
+
|
|
167
|
+
def fit(self, x, y):
|
|
168
|
+
"""Fit underlying conformal predictors.
|
|
169
|
+
|
|
170
|
+
Parameters
|
|
171
|
+
----------
|
|
172
|
+
x : numpy array of shape [n_samples, n_features]
|
|
173
|
+
Inputs of examples for fitting the underlying conformal predictors.
|
|
174
|
+
|
|
175
|
+
y : numpy array of shape [n_samples]
|
|
176
|
+
Outputs of examples for fitting the underlying conformal predictors.
|
|
177
|
+
|
|
178
|
+
Returns
|
|
179
|
+
-------
|
|
180
|
+
None
|
|
181
|
+
"""
|
|
182
|
+
self.n_train = y.size
|
|
183
|
+
self.predictors = []
|
|
184
|
+
idx = np.random.permutation(y.size)
|
|
185
|
+
x, y = x[idx, :], y[idx]
|
|
186
|
+
problem_type = self.predictor.__class__.get_problem_type()
|
|
187
|
+
samples = self.sampler.gen_samples(y, self.n_models, problem_type)
|
|
188
|
+
for train, cal in samples:
|
|
189
|
+
predictor = clone(self.predictor)
|
|
190
|
+
predictor.fit(x[train, :], y[train])
|
|
191
|
+
predictor.calibrate(x[cal, :], y[cal])
|
|
192
|
+
self.predictors.append(predictor)
|
|
193
|
+
|
|
194
|
+
if problem_type == "classification":
|
|
195
|
+
self.classes = self.predictors[0].classes
|
|
196
|
+
|
|
197
|
+
def predict(self, x, significance=None):
|
|
198
|
+
"""Predict the output values for a set of input patterns.
|
|
199
|
+
|
|
200
|
+
Parameters
|
|
201
|
+
----------
|
|
202
|
+
x : numpy array of shape [n_samples, n_features]
|
|
203
|
+
Inputs of patters for which to predict output values.
|
|
204
|
+
|
|
205
|
+
significance : float or None
|
|
206
|
+
Significance level (maximum allowed error rate) of predictions.
|
|
207
|
+
Should be a float between 0 and 1. If ``None``, then the p-values
|
|
208
|
+
are output rather than the predictions. Note: ``significance=None``
|
|
209
|
+
is applicable to classification problems only.
|
|
210
|
+
|
|
211
|
+
Returns
|
|
212
|
+
-------
|
|
213
|
+
p : numpy array of shape [n_samples, n_classes] or [n_samples, 2]
|
|
214
|
+
For classification problems: If significance is ``None``, then p
|
|
215
|
+
contains the p-values for each sample-class pair; if significance
|
|
216
|
+
is a float between 0 and 1, then p is a boolean array denoting
|
|
217
|
+
which labels are included in the prediction sets.
|
|
218
|
+
|
|
219
|
+
For regression problems: Prediction interval (minimum and maximum
|
|
220
|
+
boundaries) for the set of test patterns.
|
|
221
|
+
"""
|
|
222
|
+
is_regression = (
|
|
223
|
+
self.predictor.__class__.get_problem_type() == "regression"
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
n_examples = x.shape[0]
|
|
227
|
+
|
|
228
|
+
if is_regression and significance is None:
|
|
229
|
+
signs = np.arange(0.01, 1.0, 0.01)
|
|
230
|
+
pred = np.zeros((n_examples, 2, signs.size))
|
|
231
|
+
for i, s in enumerate(signs):
|
|
232
|
+
predictions = np.dstack(
|
|
233
|
+
[p.predict(x, s) for p in self.predictors]
|
|
234
|
+
)
|
|
235
|
+
predictions = self.agg_func(predictions)
|
|
236
|
+
pred[:, :, i] = predictions
|
|
237
|
+
return pred
|
|
238
|
+
else:
|
|
239
|
+
|
|
240
|
+
def f(p, x):
|
|
241
|
+
return p.predict(x, significance if is_regression else None)
|
|
242
|
+
|
|
243
|
+
predictions = np.dstack([f(p, x) for p in self.predictors])
|
|
244
|
+
predictions = self.agg_func(predictions)
|
|
245
|
+
|
|
246
|
+
if significance and not is_regression:
|
|
247
|
+
return predictions >= significance
|
|
248
|
+
else:
|
|
249
|
+
return predictions
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
class CrossConformalClassifier(AggregatedCp):
|
|
253
|
+
"""Cross-conformal classifier.
|
|
254
|
+
|
|
255
|
+
Combines multiple IcpClassifiers into a cross-conformal classifier.
|
|
256
|
+
|
|
257
|
+
Parameters
|
|
258
|
+
----------
|
|
259
|
+
predictor : object
|
|
260
|
+
Prototype conformal predictor (e.g. IcpClassifier or IcpRegressor)
|
|
261
|
+
used for defining conformal predictors included in the aggregate model.
|
|
262
|
+
|
|
263
|
+
aggregation_func : callable
|
|
264
|
+
Function used to aggregate the predictions of the underlying
|
|
265
|
+
conformal predictors. Defaults to ``numpy.mean``.
|
|
266
|
+
|
|
267
|
+
n_models : int
|
|
268
|
+
Number of models to aggregate.
|
|
269
|
+
|
|
270
|
+
Attributes
|
|
271
|
+
----------
|
|
272
|
+
predictor : object
|
|
273
|
+
Prototype conformal predictor.
|
|
274
|
+
|
|
275
|
+
predictors : list
|
|
276
|
+
List of underlying conformal predictors.
|
|
277
|
+
|
|
278
|
+
sampler : object
|
|
279
|
+
Sampler object used to generate training and calibration examples.
|
|
280
|
+
|
|
281
|
+
agg_func : callable
|
|
282
|
+
Function used to aggregate the predictions of the underlying
|
|
283
|
+
conformal predictors
|
|
284
|
+
|
|
285
|
+
References
|
|
286
|
+
----------
|
|
287
|
+
.. [1] Vovk, V. (2013). Cross-conformal predictors. Annals of Mathematics
|
|
288
|
+
and Artificial Intelligence, 1-20.
|
|
289
|
+
|
|
290
|
+
Examples
|
|
291
|
+
--------
|
|
292
|
+
"""
|
|
293
|
+
|
|
294
|
+
def __init__(self, predictor, n_models=10):
|
|
295
|
+
super(CrossConformalClassifier, self).__init__(
|
|
296
|
+
predictor, CrossSampler(), n_models
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
def predict(self, x, significance=None):
|
|
300
|
+
ncal_ngt_neq = np.stack(
|
|
301
|
+
[p._get_stats(x) for p in self.predictors], axis=3
|
|
302
|
+
)
|
|
303
|
+
ncal_ngt_neq = ncal_ngt_neq.sum(axis=3)
|
|
304
|
+
|
|
305
|
+
p = calc_p(
|
|
306
|
+
ncal_ngt_neq[:, :, 0],
|
|
307
|
+
ncal_ngt_neq[:, :, 1],
|
|
308
|
+
ncal_ngt_neq[:, :, 2],
|
|
309
|
+
smoothing=self.predictors[0].smoothing,
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
if significance:
|
|
313
|
+
return p > significance
|
|
314
|
+
else:
|
|
315
|
+
return p
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
class BootstrapConformalClassifier(AggregatedCp):
|
|
319
|
+
"""Bootstrap conformal classifier.
|
|
320
|
+
|
|
321
|
+
Combines multiple IcpClassifiers into a bootstrap conformal classifier.
|
|
322
|
+
|
|
323
|
+
Parameters
|
|
324
|
+
----------
|
|
325
|
+
predictor : object
|
|
326
|
+
Prototype conformal predictor (e.g. IcpClassifier or IcpRegressor)
|
|
327
|
+
used for defining conformal predictors included in the aggregate model.
|
|
328
|
+
|
|
329
|
+
aggregation_func : callable
|
|
330
|
+
Function used to aggregate the predictions of the underlying
|
|
331
|
+
conformal predictors. Defaults to ``numpy.mean``.
|
|
332
|
+
|
|
333
|
+
n_models : int
|
|
334
|
+
Number of models to aggregate.
|
|
335
|
+
|
|
336
|
+
Attributes
|
|
337
|
+
----------
|
|
338
|
+
predictor : object
|
|
339
|
+
Prototype conformal predictor.
|
|
340
|
+
|
|
341
|
+
predictors : list
|
|
342
|
+
List of underlying conformal predictors.
|
|
343
|
+
|
|
344
|
+
sampler : object
|
|
345
|
+
Sampler object used to generate training and calibration examples.
|
|
346
|
+
|
|
347
|
+
agg_func : callable
|
|
348
|
+
Function used to aggregate the predictions of the underlying
|
|
349
|
+
conformal predictors
|
|
350
|
+
|
|
351
|
+
References
|
|
352
|
+
----------
|
|
353
|
+
.. [1] Vovk, V. (2013). Cross-conformal predictors. Annals of Mathematics
|
|
354
|
+
and Artificial Intelligence, 1-20.
|
|
355
|
+
|
|
356
|
+
Examples
|
|
357
|
+
--------
|
|
358
|
+
"""
|
|
359
|
+
|
|
360
|
+
def __init__(self, predictor, n_models=10):
|
|
361
|
+
super(BootstrapConformalClassifier, self).__init__(
|
|
362
|
+
predictor, BootstrapSampler(), n_models
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
def predict(self, x, significance=None):
|
|
366
|
+
ncal_ngt_neq = np.stack(
|
|
367
|
+
[p._get_stats(x) for p in self.predictors], axis=3
|
|
368
|
+
)
|
|
369
|
+
ncal_ngt_neq = ncal_ngt_neq.sum(axis=3)
|
|
370
|
+
|
|
371
|
+
p = calc_p(
|
|
372
|
+
ncal_ngt_neq[:, :, 0] + ncal_ngt_neq[:, :, 0] / self.n_train,
|
|
373
|
+
ncal_ngt_neq[:, :, 1] + ncal_ngt_neq[:, :, 0] / self.n_train,
|
|
374
|
+
ncal_ngt_neq[:, :, 2],
|
|
375
|
+
smoothing=self.predictors[0].smoothing,
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
if significance:
|
|
379
|
+
return p > significance
|
|
380
|
+
else:
|
|
381
|
+
return p
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
docstring
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
# Authors: Henrik Linusson
|
|
8
|
+
|
|
9
|
+
import abc
|
|
10
|
+
import numpy as np
|
|
11
|
+
|
|
12
|
+
from sklearn.base import BaseEstimator, RegressorMixin, ClassifierMixin
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class RegressorMixin(object):
|
|
16
|
+
def __init__(self):
|
|
17
|
+
super(RegressorMixin, self).__init__()
|
|
18
|
+
|
|
19
|
+
@classmethod
|
|
20
|
+
def get_problem_type(cls):
|
|
21
|
+
return "regression"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ClassifierMixin(object):
|
|
25
|
+
def __init__(self):
|
|
26
|
+
super(ClassifierMixin, self).__init__()
|
|
27
|
+
|
|
28
|
+
@classmethod
|
|
29
|
+
def get_problem_type(cls):
|
|
30
|
+
return "classification"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class BaseModelAdapter(BaseEstimator):
|
|
34
|
+
__metaclass__ = abc.ABCMeta
|
|
35
|
+
|
|
36
|
+
def __init__(self, model, fit_params=None):
|
|
37
|
+
super(BaseModelAdapter, self).__init__()
|
|
38
|
+
|
|
39
|
+
self.model = model
|
|
40
|
+
self.last_x, self.last_y = None, None
|
|
41
|
+
self.clean = False
|
|
42
|
+
self.fit_params = {} if fit_params is None else fit_params
|
|
43
|
+
|
|
44
|
+
def fit(self, x, y):
|
|
45
|
+
"""Fits the model.
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
x : numpy array of shape [n_samples, n_features]
|
|
50
|
+
Inputs of examples for fitting the model.
|
|
51
|
+
|
|
52
|
+
y : numpy array of shape [n_samples]
|
|
53
|
+
Outputs of examples for fitting the model.
|
|
54
|
+
|
|
55
|
+
Returns
|
|
56
|
+
-------
|
|
57
|
+
None
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
self.model.fit(x, y, **self.fit_params)
|
|
61
|
+
self.clean = False
|
|
62
|
+
|
|
63
|
+
def predict(self, x):
|
|
64
|
+
"""Returns the prediction made by the underlying model.
|
|
65
|
+
|
|
66
|
+
Parameters
|
|
67
|
+
----------
|
|
68
|
+
x : numpy array of shape [n_samples, n_features]
|
|
69
|
+
Inputs of test examples.
|
|
70
|
+
|
|
71
|
+
Returns
|
|
72
|
+
-------
|
|
73
|
+
y : numpy array of shape [n_samples]
|
|
74
|
+
Predicted outputs of test examples.
|
|
75
|
+
"""
|
|
76
|
+
if (
|
|
77
|
+
not self.clean
|
|
78
|
+
or self.last_x is None
|
|
79
|
+
or self.last_y is None
|
|
80
|
+
or not np.array_equal(self.last_x, x)
|
|
81
|
+
):
|
|
82
|
+
self.last_x = x
|
|
83
|
+
self.last_y = self._underlying_predict(x)
|
|
84
|
+
self.clean = True
|
|
85
|
+
|
|
86
|
+
return self.last_y.copy()
|
|
87
|
+
|
|
88
|
+
@abc.abstractmethod
|
|
89
|
+
def _underlying_predict(self, x):
|
|
90
|
+
"""Produces a prediction using the encapsulated model.
|
|
91
|
+
|
|
92
|
+
Parameters
|
|
93
|
+
----------
|
|
94
|
+
x : numpy array of shape [n_samples, n_features]
|
|
95
|
+
Inputs of test examples.
|
|
96
|
+
|
|
97
|
+
Returns
|
|
98
|
+
-------
|
|
99
|
+
y : numpy array of shape [n_samples]
|
|
100
|
+
Predicted outputs of test examples.
|
|
101
|
+
"""
|
|
102
|
+
pass
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class ClassifierAdapter(BaseModelAdapter, ClassifierMixin):
|
|
106
|
+
def __init__(self, model, fit_params=None):
|
|
107
|
+
super(ClassifierAdapter, self).__init__(model, fit_params)
|
|
108
|
+
|
|
109
|
+
def _underlying_predict(self, x):
|
|
110
|
+
return self.model.predict_proba(x)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class RegressorAdapter(BaseModelAdapter, RegressorMixin):
|
|
114
|
+
def __init__(self, model, fit_params=None):
|
|
115
|
+
super(RegressorAdapter, self).__init__(model, fit_params)
|
|
116
|
+
|
|
117
|
+
def _underlying_predict(self, x):
|
|
118
|
+
return self.model.predict(x)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class OobMixin(object):
|
|
122
|
+
def __init__(self, model, fit_params=None):
|
|
123
|
+
super(OobMixin, self).__init__(model, fit_params)
|
|
124
|
+
self.train_x = None
|
|
125
|
+
|
|
126
|
+
def fit(self, x, y):
|
|
127
|
+
super(OobMixin, self).fit(x, y)
|
|
128
|
+
self.train_x = x
|
|
129
|
+
|
|
130
|
+
def _underlying_predict(self, x):
|
|
131
|
+
# TODO: sub-sampling of ensemble for test patterns
|
|
132
|
+
oob = x == self.train_x
|
|
133
|
+
|
|
134
|
+
if hasattr(oob, "all"):
|
|
135
|
+
oob = oob.all()
|
|
136
|
+
|
|
137
|
+
if oob:
|
|
138
|
+
return self._oob_prediction()
|
|
139
|
+
else:
|
|
140
|
+
return super(OobMixin, self)._underlying_predict(x)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class OobClassifierAdapter(OobMixin, ClassifierAdapter):
|
|
144
|
+
def __init__(self, model, fit_params=None):
|
|
145
|
+
super(OobClassifierAdapter, self).__init__(model, fit_params)
|
|
146
|
+
|
|
147
|
+
def _oob_prediction(self):
|
|
148
|
+
return self.model.oob_decision_function_
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
class OobRegressorAdapter(OobMixin, RegressorAdapter):
|
|
152
|
+
def __init__(self, model, fit_params=None):
|
|
153
|
+
super(OobRegressorAdapter, self).__init__(model, fit_params)
|
|
154
|
+
|
|
155
|
+
def _oob_prediction(self):
|
|
156
|
+
return self.model.oob_prediction_
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
from .icp import *
|
|
2
|
+
|
|
3
|
+
# TODO: move contents from nonconformist.icp here
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
# -----------------------------------------------------------------------------
|
|
7
|
+
# TcpClassifier
|
|
8
|
+
# -----------------------------------------------------------------------------
|
|
9
|
+
class TcpClassifier(BaseEstimator, ClassifierMixin):
|
|
10
|
+
"""Transductive conformal classifier.
|
|
11
|
+
|
|
12
|
+
Parameters
|
|
13
|
+
----------
|
|
14
|
+
nc_function : BaseScorer
|
|
15
|
+
Nonconformity scorer object used to calculate nonconformity of
|
|
16
|
+
calibration examples and test patterns. Should implement ``fit(x, y)``
|
|
17
|
+
and ``calc_nc(x, y)``.
|
|
18
|
+
|
|
19
|
+
smoothing : boolean
|
|
20
|
+
Decides whether to use stochastic smoothing of p-values.
|
|
21
|
+
|
|
22
|
+
Attributes
|
|
23
|
+
----------
|
|
24
|
+
train_x : numpy array of shape [n_cal_examples, n_features]
|
|
25
|
+
Inputs of training set.
|
|
26
|
+
|
|
27
|
+
train_y : numpy array of shape [n_cal_examples]
|
|
28
|
+
Outputs of calibration set.
|
|
29
|
+
|
|
30
|
+
nc_function : BaseScorer
|
|
31
|
+
Nonconformity scorer object used to calculate nonconformity scores.
|
|
32
|
+
|
|
33
|
+
classes : numpy array of shape [n_classes]
|
|
34
|
+
List of class labels, with indices corresponding to output columns
|
|
35
|
+
of TcpClassifier.predict()
|
|
36
|
+
|
|
37
|
+
See also
|
|
38
|
+
--------
|
|
39
|
+
IcpClassifier
|
|
40
|
+
|
|
41
|
+
References
|
|
42
|
+
----------
|
|
43
|
+
.. [1] Vovk, V., Gammerman, A., & Shafer, G. (2005). Algorithmic learning
|
|
44
|
+
in a random world. Springer Science & Business Media.
|
|
45
|
+
|
|
46
|
+
Examples
|
|
47
|
+
--------
|
|
48
|
+
>>> import numpy as np
|
|
49
|
+
>>> from sklearn.datasets import load_iris
|
|
50
|
+
>>> from sklearn.svm import SVC
|
|
51
|
+
>>> from nonconformist.base import ClassifierAdapter
|
|
52
|
+
>>> from nonconformist.cp import TcpClassifier
|
|
53
|
+
>>> from nonconformist.nc import ClassifierNc, MarginErrFunc
|
|
54
|
+
>>> iris = load_iris()
|
|
55
|
+
>>> idx = np.random.permutation(iris.target.size)
|
|
56
|
+
>>> train = idx[:int(idx.size / 2)]
|
|
57
|
+
>>> test = idx[int(idx.size / 2):]
|
|
58
|
+
>>> model = ClassifierAdapter(SVC(probability=True))
|
|
59
|
+
>>> nc = ClassifierNc(model, MarginErrFunc())
|
|
60
|
+
>>> tcp = TcpClassifier(nc)
|
|
61
|
+
>>> tcp.fit(iris.data[train, :], iris.target[train])
|
|
62
|
+
>>> tcp.predict(iris.data[test, :], significance=0.10)
|
|
63
|
+
... # doctest: +SKIP
|
|
64
|
+
array([[ True, False, False],
|
|
65
|
+
[False, True, False],
|
|
66
|
+
...,
|
|
67
|
+
[False, True, False],
|
|
68
|
+
[False, True, False]], dtype=bool)
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
def __init__(self, nc_function, condition=None, smoothing=True):
|
|
72
|
+
self.train_x, self.train_y = None, None
|
|
73
|
+
self.nc_function = nc_function
|
|
74
|
+
super(TcpClassifier, self).__init__()
|
|
75
|
+
|
|
76
|
+
# Check if condition-parameter is the default function (i.e.,
|
|
77
|
+
# lambda x: 0). This is so we can safely clone the object without
|
|
78
|
+
# the clone accidentally having self.conditional = True.
|
|
79
|
+
def default_condition(x):
|
|
80
|
+
return 0
|
|
81
|
+
|
|
82
|
+
is_default = callable(condition) and (
|
|
83
|
+
condition.__code__.co_code == default_condition.__code__.co_code
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
if is_default:
|
|
87
|
+
self.condition = condition
|
|
88
|
+
self.conditional = False
|
|
89
|
+
elif callable(condition):
|
|
90
|
+
self.condition = condition
|
|
91
|
+
self.conditional = True
|
|
92
|
+
else:
|
|
93
|
+
self.condition = lambda x: 0
|
|
94
|
+
self.conditional = False
|
|
95
|
+
|
|
96
|
+
self.smoothing = smoothing
|
|
97
|
+
|
|
98
|
+
self.base_icp = IcpClassifier(
|
|
99
|
+
self.nc_function, self.condition, self.smoothing
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
self.classes = None
|
|
103
|
+
|
|
104
|
+
def fit(self, x, y):
|
|
105
|
+
self.train_x, self.train_y = x, y
|
|
106
|
+
self.classes = np.unique(y)
|
|
107
|
+
|
|
108
|
+
def predict(self, x, significance=None):
|
|
109
|
+
"""Predict the output values for a set of input patterns.
|
|
110
|
+
|
|
111
|
+
Parameters
|
|
112
|
+
----------
|
|
113
|
+
x : numpy array of shape [n_samples, n_features]
|
|
114
|
+
Inputs of patters for which to predict output values.
|
|
115
|
+
|
|
116
|
+
significance : float or None
|
|
117
|
+
Significance level (maximum allowed error rate) of predictions.
|
|
118
|
+
Should be a float between 0 and 1. If ``None``, then the p-values
|
|
119
|
+
are output rather than the predictions.
|
|
120
|
+
|
|
121
|
+
Returns
|
|
122
|
+
-------
|
|
123
|
+
p : numpy array of shape [n_samples, n_classes]
|
|
124
|
+
If significance is ``None``, then p contains the p-values for each
|
|
125
|
+
sample-class pair; if significance is a float between 0 and 1, then
|
|
126
|
+
p is a boolean array denoting which labels are included in the
|
|
127
|
+
prediction sets.
|
|
128
|
+
"""
|
|
129
|
+
n_test = x.shape[0]
|
|
130
|
+
n_train = self.train_x.shape[0]
|
|
131
|
+
p = np.zeros((n_test, self.classes.size))
|
|
132
|
+
for i in range(n_test):
|
|
133
|
+
for j, y in enumerate(self.classes):
|
|
134
|
+
train_x = np.vstack([self.train_x, x[i, :]])
|
|
135
|
+
train_y = np.hstack([self.train_y, y])
|
|
136
|
+
self.base_icp.fit(train_x, train_y)
|
|
137
|
+
scores = self.base_icp.nc_function.score(train_x, train_y)
|
|
138
|
+
ngt = (scores[:-1] > scores[-1]).sum()
|
|
139
|
+
neq = (scores[:-1] == scores[-1]).sum()
|
|
140
|
+
|
|
141
|
+
p[i, j] = calc_p(n_train, ngt, neq, self.smoothing)
|
|
142
|
+
|
|
143
|
+
if significance is not None:
|
|
144
|
+
return p > significance
|
|
145
|
+
else:
|
|
146
|
+
return p
|
|
147
|
+
|
|
148
|
+
def predict_conf(self, x):
|
|
149
|
+
"""Predict the output values for a set of input patterns, using
|
|
150
|
+
the confidence-and-credibility output scheme.
|
|
151
|
+
|
|
152
|
+
Parameters
|
|
153
|
+
----------
|
|
154
|
+
x : numpy array of shape [n_samples, n_features]
|
|
155
|
+
Inputs of patters for which to predict output values.
|
|
156
|
+
|
|
157
|
+
Returns
|
|
158
|
+
-------
|
|
159
|
+
p : numpy array of shape [n_samples, 3]
|
|
160
|
+
p contains three columns: the first column contains the most
|
|
161
|
+
likely class for each test pattern; the second column contains
|
|
162
|
+
the confidence in the predicted class label, and the third column
|
|
163
|
+
contains the credibility of the prediction.
|
|
164
|
+
"""
|
|
165
|
+
p = self.predict(x, significance=None)
|
|
166
|
+
label = p.argmax(axis=1)
|
|
167
|
+
credibility = p.max(axis=1)
|
|
168
|
+
for i, idx in enumerate(label):
|
|
169
|
+
p[i, idx] = -np.inf
|
|
170
|
+
confidence = 1 - p.max(axis=1)
|
|
171
|
+
|
|
172
|
+
return np.array([label, confidence, credibility]).T
|