unifiedbooster 0.4.2__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,381 @@
1
+ #!/usr/bin/env python
2
+
3
+ """
4
+ Aggregated conformal predictors
5
+ """
6
+
7
+ # Authors: Henrik Linusson
8
+
9
+ import numpy as np
10
+ from sklearn.model_selection import KFold, StratifiedKFold
11
+ from sklearn.model_selection import ShuffleSplit, StratifiedShuffleSplit
12
+ from sklearn.base import clone
13
+ from nonconformist.base import BaseEstimator
14
+ from nonconformist.util import calc_p
15
+
16
+
17
+ # -----------------------------------------------------------------------------
18
+ # Sampling strategies
19
+ # -----------------------------------------------------------------------------
20
+ class BootstrapSampler(object):
21
+ """Bootstrap sampler.
22
+
23
+ See also
24
+ --------
25
+ CrossSampler, RandomSubSampler
26
+
27
+ Examples
28
+ --------
29
+ """
30
+
31
+ def gen_samples(self, y, n_samples, problem_type):
32
+ for i in range(n_samples):
33
+ idx = np.array(range(y.size))
34
+ train = np.random.choice(y.size, y.size, replace=True)
35
+ cal_mask = np.array(np.ones(idx.size), dtype=bool)
36
+ for j in train:
37
+ cal_mask[j] = False
38
+ cal = idx[cal_mask]
39
+
40
+ yield train, cal
41
+
42
+
43
+ class CrossSampler(object):
44
+ """Cross-fold sampler.
45
+
46
+ See also
47
+ --------
48
+ BootstrapSampler, RandomSubSampler
49
+
50
+ Examples
51
+ --------
52
+ """
53
+
54
+ def gen_samples(self, y, n_samples, problem_type):
55
+ if problem_type == "classification":
56
+ folds = StratifiedKFold(y, n_folds=n_samples)
57
+ else:
58
+ folds = KFold(y.size, n_folds=n_samples)
59
+ for train, cal in folds:
60
+ yield train, cal
61
+
62
+
63
+ class RandomSubSampler(object):
64
+ """Random subsample sampler.
65
+
66
+ Parameters
67
+ ----------
68
+ calibration_portion : float
69
+ Ratio (0-1) of examples to use for calibration.
70
+
71
+ See also
72
+ --------
73
+ BootstrapSampler, CrossSampler
74
+
75
+ Examples
76
+ --------
77
+ """
78
+
79
+ def __init__(self, calibration_portion=0.3):
80
+ self.cal_portion = calibration_portion
81
+
82
+ def gen_samples(self, y, n_samples, problem_type):
83
+ if problem_type == "classification":
84
+ splits = StratifiedShuffleSplit(
85
+ y, n_iter=n_samples, test_size=self.cal_portion
86
+ )
87
+ else:
88
+ splits = ShuffleSplit(
89
+ y.size, n_iter=n_samples, test_size=self.cal_portion
90
+ )
91
+
92
+ for train, cal in splits:
93
+ yield train, cal
94
+
95
+
96
+ # -----------------------------------------------------------------------------
97
+ # Conformal ensemble
98
+ # -----------------------------------------------------------------------------
99
+ class AggregatedCp(BaseEstimator):
100
+ """Aggregated conformal predictor.
101
+
102
+ Combines multiple IcpClassifier or IcpRegressor predictors into an
103
+ aggregated model.
104
+
105
+ Parameters
106
+ ----------
107
+ predictor : object
108
+ Prototype conformal predictor (e.g. IcpClassifier or IcpRegressor)
109
+ used for defining conformal predictors included in the aggregate model.
110
+
111
+ sampler : object
112
+ Sampler object used to generate training and calibration examples
113
+ for the underlying conformal predictors.
114
+
115
+ aggregation_func : callable
116
+ Function used to aggregate the predictions of the underlying
117
+ conformal predictors. Defaults to ``numpy.mean``.
118
+
119
+ n_models : int
120
+ Number of models to aggregate.
121
+
122
+ Attributes
123
+ ----------
124
+ predictor : object
125
+ Prototype conformal predictor.
126
+
127
+ predictors : list
128
+ List of underlying conformal predictors.
129
+
130
+ sampler : object
131
+ Sampler object used to generate training and calibration examples.
132
+
133
+ agg_func : callable
134
+ Function used to aggregate the predictions of the underlying
135
+ conformal predictors
136
+
137
+ References
138
+ ----------
139
+ .. [1] Vovk, V. (2013). Cross-conformal predictors. Annals of Mathematics
140
+ and Artificial Intelligence, 1-20.
141
+
142
+ .. [2] Carlsson, L., Eklund, M., & Norinder, U. (2014). Aggregated
143
+ Conformal Prediction. In Artificial Intelligence Applications and
144
+ Innovations (pp. 231-240). Springer Berlin Heidelberg.
145
+
146
+ Examples
147
+ --------
148
+ """
149
+
150
+ def __init__(
151
+ self,
152
+ predictor,
153
+ sampler=BootstrapSampler(),
154
+ aggregation_func=None,
155
+ n_models=10,
156
+ ):
157
+ self.predictors = []
158
+ self.n_models = n_models
159
+ self.predictor = predictor
160
+ self.sampler = sampler
161
+
162
+ if aggregation_func is not None:
163
+ self.agg_func = aggregation_func
164
+ else:
165
+ self.agg_func = lambda x: np.mean(x, axis=2)
166
+
167
+ def fit(self, x, y):
168
+ """Fit underlying conformal predictors.
169
+
170
+ Parameters
171
+ ----------
172
+ x : numpy array of shape [n_samples, n_features]
173
+ Inputs of examples for fitting the underlying conformal predictors.
174
+
175
+ y : numpy array of shape [n_samples]
176
+ Outputs of examples for fitting the underlying conformal predictors.
177
+
178
+ Returns
179
+ -------
180
+ None
181
+ """
182
+ self.n_train = y.size
183
+ self.predictors = []
184
+ idx = np.random.permutation(y.size)
185
+ x, y = x[idx, :], y[idx]
186
+ problem_type = self.predictor.__class__.get_problem_type()
187
+ samples = self.sampler.gen_samples(y, self.n_models, problem_type)
188
+ for train, cal in samples:
189
+ predictor = clone(self.predictor)
190
+ predictor.fit(x[train, :], y[train])
191
+ predictor.calibrate(x[cal, :], y[cal])
192
+ self.predictors.append(predictor)
193
+
194
+ if problem_type == "classification":
195
+ self.classes = self.predictors[0].classes
196
+
197
+ def predict(self, x, significance=None):
198
+ """Predict the output values for a set of input patterns.
199
+
200
+ Parameters
201
+ ----------
202
+ x : numpy array of shape [n_samples, n_features]
203
+ Inputs of patters for which to predict output values.
204
+
205
+ significance : float or None
206
+ Significance level (maximum allowed error rate) of predictions.
207
+ Should be a float between 0 and 1. If ``None``, then the p-values
208
+ are output rather than the predictions. Note: ``significance=None``
209
+ is applicable to classification problems only.
210
+
211
+ Returns
212
+ -------
213
+ p : numpy array of shape [n_samples, n_classes] or [n_samples, 2]
214
+ For classification problems: If significance is ``None``, then p
215
+ contains the p-values for each sample-class pair; if significance
216
+ is a float between 0 and 1, then p is a boolean array denoting
217
+ which labels are included in the prediction sets.
218
+
219
+ For regression problems: Prediction interval (minimum and maximum
220
+ boundaries) for the set of test patterns.
221
+ """
222
+ is_regression = (
223
+ self.predictor.__class__.get_problem_type() == "regression"
224
+ )
225
+
226
+ n_examples = x.shape[0]
227
+
228
+ if is_regression and significance is None:
229
+ signs = np.arange(0.01, 1.0, 0.01)
230
+ pred = np.zeros((n_examples, 2, signs.size))
231
+ for i, s in enumerate(signs):
232
+ predictions = np.dstack(
233
+ [p.predict(x, s) for p in self.predictors]
234
+ )
235
+ predictions = self.agg_func(predictions)
236
+ pred[:, :, i] = predictions
237
+ return pred
238
+ else:
239
+
240
+ def f(p, x):
241
+ return p.predict(x, significance if is_regression else None)
242
+
243
+ predictions = np.dstack([f(p, x) for p in self.predictors])
244
+ predictions = self.agg_func(predictions)
245
+
246
+ if significance and not is_regression:
247
+ return predictions >= significance
248
+ else:
249
+ return predictions
250
+
251
+
252
+ class CrossConformalClassifier(AggregatedCp):
253
+ """Cross-conformal classifier.
254
+
255
+ Combines multiple IcpClassifiers into a cross-conformal classifier.
256
+
257
+ Parameters
258
+ ----------
259
+ predictor : object
260
+ Prototype conformal predictor (e.g. IcpClassifier or IcpRegressor)
261
+ used for defining conformal predictors included in the aggregate model.
262
+
263
+ aggregation_func : callable
264
+ Function used to aggregate the predictions of the underlying
265
+ conformal predictors. Defaults to ``numpy.mean``.
266
+
267
+ n_models : int
268
+ Number of models to aggregate.
269
+
270
+ Attributes
271
+ ----------
272
+ predictor : object
273
+ Prototype conformal predictor.
274
+
275
+ predictors : list
276
+ List of underlying conformal predictors.
277
+
278
+ sampler : object
279
+ Sampler object used to generate training and calibration examples.
280
+
281
+ agg_func : callable
282
+ Function used to aggregate the predictions of the underlying
283
+ conformal predictors
284
+
285
+ References
286
+ ----------
287
+ .. [1] Vovk, V. (2013). Cross-conformal predictors. Annals of Mathematics
288
+ and Artificial Intelligence, 1-20.
289
+
290
+ Examples
291
+ --------
292
+ """
293
+
294
+ def __init__(self, predictor, n_models=10):
295
+ super(CrossConformalClassifier, self).__init__(
296
+ predictor, CrossSampler(), n_models
297
+ )
298
+
299
+ def predict(self, x, significance=None):
300
+ ncal_ngt_neq = np.stack(
301
+ [p._get_stats(x) for p in self.predictors], axis=3
302
+ )
303
+ ncal_ngt_neq = ncal_ngt_neq.sum(axis=3)
304
+
305
+ p = calc_p(
306
+ ncal_ngt_neq[:, :, 0],
307
+ ncal_ngt_neq[:, :, 1],
308
+ ncal_ngt_neq[:, :, 2],
309
+ smoothing=self.predictors[0].smoothing,
310
+ )
311
+
312
+ if significance:
313
+ return p > significance
314
+ else:
315
+ return p
316
+
317
+
318
+ class BootstrapConformalClassifier(AggregatedCp):
319
+ """Bootstrap conformal classifier.
320
+
321
+ Combines multiple IcpClassifiers into a bootstrap conformal classifier.
322
+
323
+ Parameters
324
+ ----------
325
+ predictor : object
326
+ Prototype conformal predictor (e.g. IcpClassifier or IcpRegressor)
327
+ used for defining conformal predictors included in the aggregate model.
328
+
329
+ aggregation_func : callable
330
+ Function used to aggregate the predictions of the underlying
331
+ conformal predictors. Defaults to ``numpy.mean``.
332
+
333
+ n_models : int
334
+ Number of models to aggregate.
335
+
336
+ Attributes
337
+ ----------
338
+ predictor : object
339
+ Prototype conformal predictor.
340
+
341
+ predictors : list
342
+ List of underlying conformal predictors.
343
+
344
+ sampler : object
345
+ Sampler object used to generate training and calibration examples.
346
+
347
+ agg_func : callable
348
+ Function used to aggregate the predictions of the underlying
349
+ conformal predictors
350
+
351
+ References
352
+ ----------
353
+ .. [1] Vovk, V. (2013). Cross-conformal predictors. Annals of Mathematics
354
+ and Artificial Intelligence, 1-20.
355
+
356
+ Examples
357
+ --------
358
+ """
359
+
360
+ def __init__(self, predictor, n_models=10):
361
+ super(BootstrapConformalClassifier, self).__init__(
362
+ predictor, BootstrapSampler(), n_models
363
+ )
364
+
365
+ def predict(self, x, significance=None):
366
+ ncal_ngt_neq = np.stack(
367
+ [p._get_stats(x) for p in self.predictors], axis=3
368
+ )
369
+ ncal_ngt_neq = ncal_ngt_neq.sum(axis=3)
370
+
371
+ p = calc_p(
372
+ ncal_ngt_neq[:, :, 0] + ncal_ngt_neq[:, :, 0] / self.n_train,
373
+ ncal_ngt_neq[:, :, 1] + ncal_ngt_neq[:, :, 0] / self.n_train,
374
+ ncal_ngt_neq[:, :, 2],
375
+ smoothing=self.predictors[0].smoothing,
376
+ )
377
+
378
+ if significance:
379
+ return p > significance
380
+ else:
381
+ return p
@@ -0,0 +1,156 @@
1
+ #!/usr/bin/env python
2
+
3
+ """
4
+ docstring
5
+ """
6
+
7
+ # Authors: Henrik Linusson
8
+
9
+ import abc
10
+ import numpy as np
11
+
12
+ from sklearn.base import BaseEstimator
13
+
14
+
15
+ class RegressorMixin(object):
16
+ def __init__(self):
17
+ super(RegressorMixin, self).__init__()
18
+
19
+ @classmethod
20
+ def get_problem_type(cls):
21
+ return "regression"
22
+
23
+
24
+ class ClassifierMixin(object):
25
+ def __init__(self):
26
+ super(ClassifierMixin, self).__init__()
27
+
28
+ @classmethod
29
+ def get_problem_type(cls):
30
+ return "classification"
31
+
32
+
33
+ class BaseModelAdapter(BaseEstimator):
34
+ __metaclass__ = abc.ABCMeta
35
+
36
+ def __init__(self, model, fit_params=None):
37
+ super(BaseModelAdapter, self).__init__()
38
+
39
+ self.model = model
40
+ self.last_x, self.last_y = None, None
41
+ self.clean = False
42
+ self.fit_params = {} if fit_params is None else fit_params
43
+
44
+ def fit(self, x, y):
45
+ """Fits the model.
46
+
47
+ Parameters
48
+ ----------
49
+ x : numpy array of shape [n_samples, n_features]
50
+ Inputs of examples for fitting the model.
51
+
52
+ y : numpy array of shape [n_samples]
53
+ Outputs of examples for fitting the model.
54
+
55
+ Returns
56
+ -------
57
+ None
58
+ """
59
+
60
+ self.model.fit(x, y, **self.fit_params)
61
+ self.clean = False
62
+
63
+ def predict(self, x):
64
+ """Returns the prediction made by the underlying model.
65
+
66
+ Parameters
67
+ ----------
68
+ x : numpy array of shape [n_samples, n_features]
69
+ Inputs of test examples.
70
+
71
+ Returns
72
+ -------
73
+ y : numpy array of shape [n_samples]
74
+ Predicted outputs of test examples.
75
+ """
76
+ if (
77
+ not self.clean
78
+ or self.last_x is None
79
+ or self.last_y is None
80
+ or not np.array_equal(self.last_x, x)
81
+ ):
82
+ self.last_x = x
83
+ self.last_y = self._underlying_predict(x)
84
+ self.clean = True
85
+
86
+ return self.last_y.copy()
87
+
88
+ @abc.abstractmethod
89
+ def _underlying_predict(self, x):
90
+ """Produces a prediction using the encapsulated model.
91
+
92
+ Parameters
93
+ ----------
94
+ x : numpy array of shape [n_samples, n_features]
95
+ Inputs of test examples.
96
+
97
+ Returns
98
+ -------
99
+ y : numpy array of shape [n_samples]
100
+ Predicted outputs of test examples.
101
+ """
102
+ pass
103
+
104
+
105
+ class ClassifierAdapter(BaseModelAdapter):
106
+ def __init__(self, model, fit_params=None):
107
+ super(ClassifierAdapter, self).__init__(model, fit_params)
108
+
109
+ def _underlying_predict(self, x):
110
+ return self.model.predict_proba(x)
111
+
112
+
113
+ class RegressorAdapter(BaseModelAdapter):
114
+ def __init__(self, model, fit_params=None):
115
+ super(RegressorAdapter, self).__init__(model, fit_params)
116
+
117
+ def _underlying_predict(self, x):
118
+ return self.model.predict(x)
119
+
120
+
121
+ class OobMixin(object):
122
+ def __init__(self, model, fit_params=None):
123
+ super(OobMixin, self).__init__(model, fit_params)
124
+ self.train_x = None
125
+
126
+ def fit(self, x, y):
127
+ super(OobMixin, self).fit(x, y)
128
+ self.train_x = x
129
+
130
+ def _underlying_predict(self, x):
131
+ # TODO: sub-sampling of ensemble for test patterns
132
+ oob = x == self.train_x
133
+
134
+ if hasattr(oob, "all"):
135
+ oob = oob.all()
136
+
137
+ if oob:
138
+ return self._oob_prediction()
139
+ else:
140
+ return super(OobMixin, self)._underlying_predict(x)
141
+
142
+
143
+ class OobClassifierAdapter(OobMixin, ClassifierAdapter):
144
+ def __init__(self, model, fit_params=None):
145
+ super(OobClassifierAdapter, self).__init__(model, fit_params)
146
+
147
+ def _oob_prediction(self):
148
+ return self.model.oob_decision_function_
149
+
150
+
151
+ class OobRegressorAdapter(OobMixin, RegressorAdapter):
152
+ def __init__(self, model, fit_params=None):
153
+ super(OobRegressorAdapter, self).__init__(model, fit_params)
154
+
155
+ def _oob_prediction(self):
156
+ return self.model.oob_prediction_
@@ -0,0 +1,172 @@
1
+ from .icp import *
2
+
3
+ # TODO: move contents from nonconformist.icp here
4
+
5
+
6
+ # -----------------------------------------------------------------------------
7
+ # TcpClassifier
8
+ # -----------------------------------------------------------------------------
9
+ class TcpClassifier(BaseEstimator, ClassifierMixin):
10
+ """Transductive conformal classifier.
11
+
12
+ Parameters
13
+ ----------
14
+ nc_function : BaseScorer
15
+ Nonconformity scorer object used to calculate nonconformity of
16
+ calibration examples and test patterns. Should implement ``fit(x, y)``
17
+ and ``calc_nc(x, y)``.
18
+
19
+ smoothing : boolean
20
+ Decides whether to use stochastic smoothing of p-values.
21
+
22
+ Attributes
23
+ ----------
24
+ train_x : numpy array of shape [n_cal_examples, n_features]
25
+ Inputs of training set.
26
+
27
+ train_y : numpy array of shape [n_cal_examples]
28
+ Outputs of calibration set.
29
+
30
+ nc_function : BaseScorer
31
+ Nonconformity scorer object used to calculate nonconformity scores.
32
+
33
+ classes : numpy array of shape [n_classes]
34
+ List of class labels, with indices corresponding to output columns
35
+ of TcpClassifier.predict()
36
+
37
+ See also
38
+ --------
39
+ IcpClassifier
40
+
41
+ References
42
+ ----------
43
+ .. [1] Vovk, V., Gammerman, A., & Shafer, G. (2005). Algorithmic learning
44
+ in a random world. Springer Science & Business Media.
45
+
46
+ Examples
47
+ --------
48
+ >>> import numpy as np
49
+ >>> from sklearn.datasets import load_iris
50
+ >>> from sklearn.svm import SVC
51
+ >>> from nonconformist.base import ClassifierAdapter
52
+ >>> from nonconformist.cp import TcpClassifier
53
+ >>> from nonconformist.nc import ClassifierNc, MarginErrFunc
54
+ >>> iris = load_iris()
55
+ >>> idx = np.random.permutation(iris.target.size)
56
+ >>> train = idx[:int(idx.size / 2)]
57
+ >>> test = idx[int(idx.size / 2):]
58
+ >>> model = ClassifierAdapter(SVC(probability=True))
59
+ >>> nc = ClassifierNc(model, MarginErrFunc())
60
+ >>> tcp = TcpClassifier(nc)
61
+ >>> tcp.fit(iris.data[train, :], iris.target[train])
62
+ >>> tcp.predict(iris.data[test, :], significance=0.10)
63
+ ... # doctest: +SKIP
64
+ array([[ True, False, False],
65
+ [False, True, False],
66
+ ...,
67
+ [False, True, False],
68
+ [False, True, False]], dtype=bool)
69
+ """
70
+
71
+ def __init__(self, nc_function, condition=None, smoothing=True):
72
+ self.train_x, self.train_y = None, None
73
+ self.nc_function = nc_function
74
+ super(TcpClassifier, self).__init__()
75
+
76
+ # Check if condition-parameter is the default function (i.e.,
77
+ # lambda x: 0). This is so we can safely clone the object without
78
+ # the clone accidentally having self.conditional = True.
79
+ def default_condition(x):
80
+ return 0
81
+
82
+ is_default = callable(condition) and (
83
+ condition.__code__.co_code == default_condition.__code__.co_code
84
+ )
85
+
86
+ if is_default:
87
+ self.condition = condition
88
+ self.conditional = False
89
+ elif callable(condition):
90
+ self.condition = condition
91
+ self.conditional = True
92
+ else:
93
+ self.condition = lambda x: 0
94
+ self.conditional = False
95
+
96
+ self.smoothing = smoothing
97
+
98
+ self.base_icp = IcpClassifier(
99
+ self.nc_function, self.condition, self.smoothing
100
+ )
101
+
102
+ self.classes = None
103
+
104
+ def fit(self, x, y):
105
+ self.train_x, self.train_y = x, y
106
+ self.classes = np.unique(y)
107
+
108
+ def predict(self, x, significance=None):
109
+ """Predict the output values for a set of input patterns.
110
+
111
+ Parameters
112
+ ----------
113
+ x : numpy array of shape [n_samples, n_features]
114
+ Inputs of patters for which to predict output values.
115
+
116
+ significance : float or None
117
+ Significance level (maximum allowed error rate) of predictions.
118
+ Should be a float between 0 and 1. If ``None``, then the p-values
119
+ are output rather than the predictions.
120
+
121
+ Returns
122
+ -------
123
+ p : numpy array of shape [n_samples, n_classes]
124
+ If significance is ``None``, then p contains the p-values for each
125
+ sample-class pair; if significance is a float between 0 and 1, then
126
+ p is a boolean array denoting which labels are included in the
127
+ prediction sets.
128
+ """
129
+ n_test = x.shape[0]
130
+ n_train = self.train_x.shape[0]
131
+ p = np.zeros((n_test, self.classes.size))
132
+ for i in range(n_test):
133
+ for j, y in enumerate(self.classes):
134
+ train_x = np.vstack([self.train_x, x[i, :]])
135
+ train_y = np.hstack([self.train_y, y])
136
+ self.base_icp.fit(train_x, train_y)
137
+ scores = self.base_icp.nc_function.score(train_x, train_y)
138
+ ngt = (scores[:-1] > scores[-1]).sum()
139
+ neq = (scores[:-1] == scores[-1]).sum()
140
+
141
+ p[i, j] = calc_p(n_train, ngt, neq, self.smoothing)
142
+
143
+ if significance is not None:
144
+ return p > significance
145
+ else:
146
+ return p
147
+
148
+ def predict_conf(self, x):
149
+ """Predict the output values for a set of input patterns, using
150
+ the confidence-and-credibility output scheme.
151
+
152
+ Parameters
153
+ ----------
154
+ x : numpy array of shape [n_samples, n_features]
155
+ Inputs of patters for which to predict output values.
156
+
157
+ Returns
158
+ -------
159
+ p : numpy array of shape [n_samples, 3]
160
+ p contains three columns: the first column contains the most
161
+ likely class for each test pattern; the second column contains
162
+ the confidence in the predicted class label, and the third column
163
+ contains the credibility of the prediction.
164
+ """
165
+ p = self.predict(x, significance=None)
166
+ label = p.argmax(axis=1)
167
+ credibility = p.max(axis=1)
168
+ for i, idx in enumerate(label):
169
+ p[i, idx] = -np.inf
170
+ confidence = 1 - p.max(axis=1)
171
+
172
+ return np.array([label, confidence, credibility]).T