unifiedbooster 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,442 @@
1
+ #!/usr/bin/env python
2
+
3
+ """
4
+ Inductive conformal predictors.
5
+ """
6
+
7
+ # Authors: Henrik Linusson
8
+
9
+ from __future__ import division
10
+ from collections import defaultdict
11
+ from functools import partial
12
+
13
+ import numpy as np
14
+ from sklearn.base import BaseEstimator
15
+
16
+ from .base import RegressorMixin, ClassifierMixin
17
+ from .util import calc_p
18
+
19
+
20
+ # -----------------------------------------------------------------------------
21
+ # Base inductive conformal predictor
22
+ # -----------------------------------------------------------------------------
23
+ class BaseIcp(BaseEstimator):
24
+ """Base class for inductive conformal predictors."""
25
+
26
+ def __init__(self, nc_function, condition=None):
27
+ self.cal_x, self.cal_y = None, None
28
+ self.nc_function = nc_function
29
+
30
+ # Check if condition-parameter is the default function (i.e.,
31
+ # lambda x: 0). This is so we can safely clone the object without
32
+ # the clone accidentally having self.conditional = True.
33
+ def default_condition(x):
34
+ return 0
35
+
36
+ is_default = callable(condition) and (
37
+ condition.__code__.co_code == default_condition.__code__.co_code
38
+ )
39
+
40
+ if is_default:
41
+ self.condition = condition
42
+ self.conditional = False
43
+ elif callable(condition):
44
+ self.condition = condition
45
+ self.conditional = True
46
+ else:
47
+ self.condition = lambda x: 0
48
+ self.conditional = False
49
+
50
+ def fit(self, x, y):
51
+ """Fit underlying nonconformity scorer.
52
+
53
+ Parameters
54
+ ----------
55
+ x : numpy array of shape [n_samples, n_features]
56
+ Inputs of examples for fitting the nonconformity scorer.
57
+
58
+ y : numpy array of shape [n_samples]
59
+ Outputs of examples for fitting the nonconformity scorer.
60
+
61
+ Returns
62
+ -------
63
+ None
64
+ """
65
+ # TODO: incremental?
66
+ self.nc_function.fit(x, y)
67
+
68
+ def calibrate(self, x, y, increment=False):
69
+ """Calibrate conformal predictor based on underlying nonconformity
70
+ scorer.
71
+
72
+ Parameters
73
+ ----------
74
+ x : numpy array of shape [n_samples, n_features]
75
+ Inputs of examples for calibrating the conformal predictor.
76
+
77
+ y : numpy array of shape [n_samples, n_features]
78
+ Outputs of examples for calibrating the conformal predictor.
79
+
80
+ increment : boolean
81
+ If ``True``, performs an incremental recalibration of the conformal
82
+ predictor. The supplied ``x`` and ``y`` are added to the set of
83
+ previously existing calibration examples, and the conformal
84
+ predictor is then calibrated on both the old and new calibration
85
+ examples.
86
+
87
+ Returns
88
+ -------
89
+ None
90
+ """
91
+ self._calibrate_hook(x, y, increment)
92
+ self._update_calibration_set(x, y, increment)
93
+
94
+ if self.conditional:
95
+ category_map = np.array(
96
+ [self.condition((x[i, :], y[i])) for i in range(y.size)]
97
+ )
98
+ self.categories = np.unique(category_map)
99
+ self.cal_scores = defaultdict(partial(np.ndarray, 0))
100
+
101
+ for cond in self.categories:
102
+ idx = category_map == cond
103
+ cal_scores = self.nc_function.score(
104
+ self.cal_x[idx, :], self.cal_y[idx]
105
+ )
106
+ self.cal_scores[cond] = np.sort(cal_scores, 0)[::-1]
107
+ else:
108
+ self.categories = np.array([0])
109
+ cal_scores = self.nc_function.score(self.cal_x, self.cal_y)
110
+ self.cal_scores = {0: np.sort(cal_scores, 0)[::-1]}
111
+
112
+ def _calibrate_hook(self, x, y, increment):
113
+ pass
114
+
115
+ def _update_calibration_set(self, x, y, increment):
116
+ if increment and self.cal_x is not None and self.cal_y is not None:
117
+ self.cal_x = np.vstack([self.cal_x, x])
118
+ self.cal_y = np.hstack([self.cal_y, y])
119
+ else:
120
+ self.cal_x, self.cal_y = x, y
121
+
122
+
123
+ # -----------------------------------------------------------------------------
124
+ # Inductive conformal classifier
125
+ # -----------------------------------------------------------------------------
126
+ class IcpClassifier(BaseIcp, ClassifierMixin):
127
+ """Inductive conformal classifier.
128
+
129
+ Parameters
130
+ ----------
131
+ nc_function : BaseScorer
132
+ Nonconformity scorer object used to calculate nonconformity of
133
+ calibration examples and test patterns. Should implement ``fit(x, y)``
134
+ and ``calc_nc(x, y)``.
135
+
136
+ smoothing : boolean
137
+ Decides whether to use stochastic smoothing of p-values.
138
+
139
+ Attributes
140
+ ----------
141
+ cal_x : numpy array of shape [n_cal_examples, n_features]
142
+ Inputs of calibration set.
143
+
144
+ cal_y : numpy array of shape [n_cal_examples]
145
+ Outputs of calibration set.
146
+
147
+ nc_function : BaseScorer
148
+ Nonconformity scorer object used to calculate nonconformity scores.
149
+
150
+ classes : numpy array of shape [n_classes]
151
+ List of class labels, with indices corresponding to output columns
152
+ of IcpClassifier.predict()
153
+
154
+ See also
155
+ --------
156
+ IcpRegressor
157
+
158
+ References
159
+ ----------
160
+ .. [1] Papadopoulos, H., & Haralambous, H. (2011). Reliable prediction
161
+ intervals with regression neural networks. Neural Networks, 24(8),
162
+ 842-851.
163
+
164
+ Examples
165
+ --------
166
+ >>> import numpy as np
167
+ >>> from sklearn.datasets import load_iris
168
+ >>> from sklearn.tree import DecisionTreeClassifier
169
+ >>> from nonconformist.base import ClassifierAdapter
170
+ >>> from nonconformist.icp import IcpClassifier
171
+ >>> from nonconformist.nc import ClassifierNc, MarginErrFunc
172
+ >>> iris = load_iris()
173
+ >>> idx = np.random.permutation(iris.target.size)
174
+ >>> train = idx[:int(idx.size / 3)]
175
+ >>> cal = idx[int(idx.size / 3):int(2 * idx.size / 3)]
176
+ >>> test = idx[int(2 * idx.size / 3):]
177
+ >>> model = ClassifierAdapter(DecisionTreeClassifier())
178
+ >>> nc = ClassifierNc(model, MarginErrFunc())
179
+ >>> icp = IcpClassifier(nc)
180
+ >>> icp.fit(iris.data[train, :], iris.target[train])
181
+ >>> icp.calibrate(iris.data[cal, :], iris.target[cal])
182
+ >>> icp.predict(iris.data[test, :], significance=0.10)
183
+ ... # doctest: +SKIP
184
+ array([[ True, False, False],
185
+ [False, True, False],
186
+ ...,
187
+ [False, True, False],
188
+ [False, True, False]], dtype=bool)
189
+ """
190
+
191
+ def __init__(self, nc_function, condition=None, smoothing=True):
192
+ super(IcpClassifier, self).__init__(nc_function, condition)
193
+ self.classes = None
194
+ self.smoothing = smoothing
195
+
196
+ def _calibrate_hook(self, x, y, increment=False):
197
+ self._update_classes(y, increment)
198
+
199
+ def _update_classes(self, y, increment):
200
+ if self.classes is None or not increment:
201
+ self.classes = np.unique(y)
202
+ else:
203
+ self.classes = np.unique(np.hstack([self.classes, y]))
204
+
205
+ def predict(self, x, significance=None):
206
+ """Predict the output values for a set of input patterns.
207
+
208
+ Parameters
209
+ ----------
210
+ x : numpy array of shape [n_samples, n_features]
211
+ Inputs of patters for which to predict output values.
212
+
213
+ significance : float or None
214
+ Significance level (maximum allowed error rate) of predictions.
215
+ Should be a float between 0 and 1. If ``None``, then the p-values
216
+ are output rather than the predictions.
217
+
218
+ Returns
219
+ -------
220
+ p : numpy array of shape [n_samples, n_classes]
221
+ If significance is ``None``, then p contains the p-values for each
222
+ sample-class pair; if significance is a float between 0 and 1, then
223
+ p is a boolean array denoting which labels are included in the
224
+ prediction sets.
225
+ """
226
+ # TODO: if x == self.last_x ...
227
+ n_test_objects = x.shape[0]
228
+ p = np.zeros((n_test_objects, self.classes.size))
229
+
230
+ ncal_ngt_neq = self._get_stats(x)
231
+
232
+ for i in range(len(self.classes)):
233
+ for j in range(n_test_objects):
234
+ p[j, i] = calc_p(
235
+ ncal_ngt_neq[j, i, 0],
236
+ ncal_ngt_neq[j, i, 1],
237
+ ncal_ngt_neq[j, i, 2],
238
+ self.smoothing,
239
+ )
240
+
241
+ if significance is not None:
242
+ return p > significance
243
+ else:
244
+ return p
245
+
246
+ def _get_stats(self, x):
247
+ n_test_objects = x.shape[0]
248
+ ncal_ngt_neq = np.zeros((n_test_objects, self.classes.size, 3))
249
+ for i, c in enumerate(self.classes):
250
+ test_class = np.zeros(x.shape[0], dtype=self.classes.dtype)
251
+ test_class.fill(c)
252
+
253
+ # TODO: maybe calculate p-values using cython or similar
254
+ # TODO: interpolated p-values
255
+
256
+ # TODO: nc_function.calc_nc should take X * {y1, y2, ... ,yn}
257
+ test_nc_scores = self.nc_function.score(x, test_class)
258
+ for j, nc in enumerate(test_nc_scores):
259
+ cal_scores = self.cal_scores[self.condition((x[j, :], c))][::-1]
260
+ n_cal = cal_scores.size
261
+
262
+ idx_left = np.searchsorted(cal_scores, nc, "left")
263
+ idx_right = np.searchsorted(cal_scores, nc, "right")
264
+
265
+ ncal_ngt_neq[j, i, 0] = n_cal
266
+ ncal_ngt_neq[j, i, 1] = n_cal - idx_right
267
+ ncal_ngt_neq[j, i, 2] = idx_right - idx_left
268
+
269
+ return ncal_ngt_neq
270
+
271
+ def predict_conf(self, x):
272
+ """Predict the output values for a set of input patterns, using
273
+ the confidence-and-credibility output scheme.
274
+
275
+ Parameters
276
+ ----------
277
+ x : numpy array of shape [n_samples, n_features]
278
+ Inputs of patters for which to predict output values.
279
+
280
+ Returns
281
+ -------
282
+ p : numpy array of shape [n_samples, 3]
283
+ p contains three columns: the first column contains the most
284
+ likely class for each test pattern; the second column contains
285
+ the confidence in the predicted class label, and the third column
286
+ contains the credibility of the prediction.
287
+ """
288
+ p = self.predict(x, significance=None)
289
+ label = p.argmax(axis=1)
290
+ credibility = p.max(axis=1)
291
+ for i, idx in enumerate(label):
292
+ p[i, idx] = -np.inf
293
+ confidence = 1 - p.max(axis=1)
294
+
295
+ return np.array([label, confidence, credibility]).T
296
+
297
+
298
+ # -----------------------------------------------------------------------------
299
+ # Inductive conformal regressor
300
+ # -----------------------------------------------------------------------------
301
+ class IcpRegressor(BaseIcp, RegressorMixin):
302
+ """Inductive conformal regressor.
303
+
304
+ Parameters
305
+ ----------
306
+ nc_function : BaseScorer
307
+ Nonconformity scorer object used to calculate nonconformity of
308
+ calibration examples and test patterns. Should implement ``fit(x, y)``,
309
+ ``calc_nc(x, y)`` and ``predict(x, nc_scores, significance)``.
310
+
311
+ Attributes
312
+ ----------
313
+ cal_x : numpy array of shape [n_cal_examples, n_features]
314
+ Inputs of calibration set.
315
+
316
+ cal_y : numpy array of shape [n_cal_examples]
317
+ Outputs of calibration set.
318
+
319
+ nc_function : BaseScorer
320
+ Nonconformity scorer object used to calculate nonconformity scores.
321
+
322
+ See also
323
+ --------
324
+ IcpClassifier
325
+
326
+ References
327
+ ----------
328
+ .. [1] Papadopoulos, H., Proedrou, K., Vovk, V., & Gammerman, A. (2002).
329
+ Inductive confidence machines for regression. In Machine Learning: ECML
330
+ 2002 (pp. 345-356). Springer Berlin Heidelberg.
331
+
332
+ .. [2] Papadopoulos, H., & Haralambous, H. (2011). Reliable prediction
333
+ intervals with regression neural networks. Neural Networks, 24(8),
334
+ 842-851.
335
+
336
+ Examples
337
+ --------
338
+ >>> import numpy as np
339
+ >>> from sklearn.datasets import load_boston
340
+ >>> from sklearn.tree import DecisionTreeRegressor
341
+ >>> from nonconformist.base import RegressorAdapter
342
+ >>> from nonconformist.icp import IcpRegressor
343
+ >>> from nonconformist.nc import RegressorNc, AbsErrorErrFunc
344
+ >>> boston = load_boston()
345
+ >>> idx = np.random.permutation(boston.target.size)
346
+ >>> train = idx[:int(idx.size / 3)]
347
+ >>> cal = idx[int(idx.size / 3):int(2 * idx.size / 3)]
348
+ >>> test = idx[int(2 * idx.size / 3):]
349
+ >>> model = RegressorAdapter(DecisionTreeRegressor())
350
+ >>> nc = RegressorNc(model, AbsErrorErrFunc())
351
+ >>> icp = IcpRegressor(nc)
352
+ >>> icp.fit(boston.data[train, :], boston.target[train])
353
+ >>> icp.calibrate(boston.data[cal, :], boston.target[cal])
354
+ >>> icp.predict(boston.data[test, :], significance=0.10)
355
+ ... # doctest: +SKIP
356
+ array([[ 5. , 20.6],
357
+ [ 15.5, 31.1],
358
+ ...,
359
+ [ 14.2, 29.8],
360
+ [ 11.6, 27.2]])
361
+ """
362
+
363
+ def __init__(self, nc_function, condition=None):
364
+ super(IcpRegressor, self).__init__(nc_function, condition)
365
+
366
+ def predict(self, x, significance=None):
367
+ """Predict the output values for a set of input patterns.
368
+
369
+ Parameters
370
+ ----------
371
+ x : numpy array of shape [n_samples, n_features]
372
+ Inputs of patters for which to predict output values.
373
+
374
+ significance : float
375
+ Significance level (maximum allowed error rate) of predictions.
376
+ Should be a float between 0 and 1. If ``None``, then intervals for
377
+ all significance levels (0.01, 0.02, ..., 0.99) are output in a
378
+ 3d-matrix.
379
+
380
+ Returns
381
+ -------
382
+ p : numpy array of shape [n_samples, 2] or [n_samples, 2, 99}
383
+ If significance is ``None``, then p contains the interval (minimum
384
+ and maximum boundaries) for each test pattern, and each significance
385
+ level (0.01, 0.02, ..., 0.99). If significance is a float between
386
+ 0 and 1, then p contains the prediction intervals (minimum and
387
+ maximum boundaries) for the set of test patterns at the chosen
388
+ significance level.
389
+ """
390
+ # TODO: interpolated p-values
391
+
392
+ n_significance = (
393
+ 99 if significance is None else np.array(significance).size
394
+ )
395
+
396
+ if n_significance > 1:
397
+ prediction = np.zeros((x.shape[0], 2, n_significance))
398
+ else:
399
+ prediction = np.zeros((x.shape[0], 2))
400
+
401
+ condition_map = np.array(
402
+ [self.condition((x[i, :], None)) for i in range(x.shape[0])]
403
+ )
404
+
405
+ for condition in self.categories:
406
+ idx = condition_map == condition
407
+ if np.sum(idx) > 0:
408
+ p = self.nc_function.predict(
409
+ x[idx, :], self.cal_scores[condition], significance
410
+ )
411
+ if n_significance > 1:
412
+ prediction[idx, :, :] = p
413
+ else:
414
+ prediction[idx, :] = p
415
+
416
+ return prediction
417
+
418
+
419
+ class OobCpClassifier(IcpClassifier):
420
+ def __init__(self, nc_function, condition=None, smoothing=True):
421
+ super(OobCpClassifier, self).__init__(nc_function, condition, smoothing)
422
+
423
+ def fit(self, x, y):
424
+ super(OobCpClassifier, self).fit(x, y)
425
+ super(OobCpClassifier, self).calibrate(x, y, False)
426
+
427
+ def calibrate(self, x, y, increment=False):
428
+ # Should throw exception (or really not be implemented for oob)
429
+ pass
430
+
431
+
432
+ class OobCpRegressor(IcpRegressor):
433
+ def __init__(self, nc_function, condition=None):
434
+ super(OobCpRegressor, self).__init__(nc_function, condition)
435
+
436
+ def fit(self, x, y):
437
+ super(OobCpRegressor, self).fit(x, y)
438
+ super(OobCpRegressor, self).calibrate(x, y, False)
439
+
440
+ def calibrate(self, x, y, increment=False):
441
+ # Should throw exception (or really not be implemented for oob)
442
+ pass