unifiedbooster 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unifiedbooster/gpoptimization.py +115 -57
- unifiedbooster/nonconformist/__init__.py +30 -0
- unifiedbooster/nonconformist/acp.py +381 -0
- unifiedbooster/nonconformist/base.py +156 -0
- unifiedbooster/nonconformist/cp.py +172 -0
- unifiedbooster/nonconformist/evaluation.py +486 -0
- unifiedbooster/nonconformist/icp.py +442 -0
- unifiedbooster/nonconformist/nc.py +610 -0
- unifiedbooster/nonconformist/util.py +9 -0
- {unifiedbooster-0.5.0.dist-info → unifiedbooster-0.6.0.dist-info}/METADATA +1 -1
- unifiedbooster-0.6.0.dist-info/RECORD +19 -0
- unifiedbooster-0.5.0.dist-info/RECORD +0 -11
- {unifiedbooster-0.5.0.dist-info → unifiedbooster-0.6.0.dist-info}/LICENSE +0 -0
- {unifiedbooster-0.5.0.dist-info → unifiedbooster-0.6.0.dist-info}/WHEEL +0 -0
- {unifiedbooster-0.5.0.dist-info → unifiedbooster-0.6.0.dist-info}/entry_points.txt +0 -0
- {unifiedbooster-0.5.0.dist-info → unifiedbooster-0.6.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
docstring
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
# Authors: Henrik Linusson
|
|
8
|
+
|
|
9
|
+
import abc
|
|
10
|
+
import numpy as np
|
|
11
|
+
|
|
12
|
+
from sklearn.base import BaseEstimator
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class RegressorMixin(object):
|
|
16
|
+
def __init__(self):
|
|
17
|
+
super(RegressorMixin, self).__init__()
|
|
18
|
+
|
|
19
|
+
@classmethod
|
|
20
|
+
def get_problem_type(cls):
|
|
21
|
+
return "regression"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ClassifierMixin(object):
|
|
25
|
+
def __init__(self):
|
|
26
|
+
super(ClassifierMixin, self).__init__()
|
|
27
|
+
|
|
28
|
+
@classmethod
|
|
29
|
+
def get_problem_type(cls):
|
|
30
|
+
return "classification"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class BaseModelAdapter(BaseEstimator):
|
|
34
|
+
__metaclass__ = abc.ABCMeta
|
|
35
|
+
|
|
36
|
+
def __init__(self, model, fit_params=None):
|
|
37
|
+
super(BaseModelAdapter, self).__init__()
|
|
38
|
+
|
|
39
|
+
self.model = model
|
|
40
|
+
self.last_x, self.last_y = None, None
|
|
41
|
+
self.clean = False
|
|
42
|
+
self.fit_params = {} if fit_params is None else fit_params
|
|
43
|
+
|
|
44
|
+
def fit(self, x, y):
|
|
45
|
+
"""Fits the model.
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
x : numpy array of shape [n_samples, n_features]
|
|
50
|
+
Inputs of examples for fitting the model.
|
|
51
|
+
|
|
52
|
+
y : numpy array of shape [n_samples]
|
|
53
|
+
Outputs of examples for fitting the model.
|
|
54
|
+
|
|
55
|
+
Returns
|
|
56
|
+
-------
|
|
57
|
+
None
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
self.model.fit(x, y, **self.fit_params)
|
|
61
|
+
self.clean = False
|
|
62
|
+
|
|
63
|
+
def predict(self, x):
|
|
64
|
+
"""Returns the prediction made by the underlying model.
|
|
65
|
+
|
|
66
|
+
Parameters
|
|
67
|
+
----------
|
|
68
|
+
x : numpy array of shape [n_samples, n_features]
|
|
69
|
+
Inputs of test examples.
|
|
70
|
+
|
|
71
|
+
Returns
|
|
72
|
+
-------
|
|
73
|
+
y : numpy array of shape [n_samples]
|
|
74
|
+
Predicted outputs of test examples.
|
|
75
|
+
"""
|
|
76
|
+
if (
|
|
77
|
+
not self.clean
|
|
78
|
+
or self.last_x is None
|
|
79
|
+
or self.last_y is None
|
|
80
|
+
or not np.array_equal(self.last_x, x)
|
|
81
|
+
):
|
|
82
|
+
self.last_x = x
|
|
83
|
+
self.last_y = self._underlying_predict(x)
|
|
84
|
+
self.clean = True
|
|
85
|
+
|
|
86
|
+
return self.last_y.copy()
|
|
87
|
+
|
|
88
|
+
@abc.abstractmethod
|
|
89
|
+
def _underlying_predict(self, x):
|
|
90
|
+
"""Produces a prediction using the encapsulated model.
|
|
91
|
+
|
|
92
|
+
Parameters
|
|
93
|
+
----------
|
|
94
|
+
x : numpy array of shape [n_samples, n_features]
|
|
95
|
+
Inputs of test examples.
|
|
96
|
+
|
|
97
|
+
Returns
|
|
98
|
+
-------
|
|
99
|
+
y : numpy array of shape [n_samples]
|
|
100
|
+
Predicted outputs of test examples.
|
|
101
|
+
"""
|
|
102
|
+
pass
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class ClassifierAdapter(BaseModelAdapter):
|
|
106
|
+
def __init__(self, model, fit_params=None):
|
|
107
|
+
super(ClassifierAdapter, self).__init__(model, fit_params)
|
|
108
|
+
|
|
109
|
+
def _underlying_predict(self, x):
|
|
110
|
+
return self.model.predict_proba(x)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class RegressorAdapter(BaseModelAdapter):
|
|
114
|
+
def __init__(self, model, fit_params=None):
|
|
115
|
+
super(RegressorAdapter, self).__init__(model, fit_params)
|
|
116
|
+
|
|
117
|
+
def _underlying_predict(self, x):
|
|
118
|
+
return self.model.predict(x)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class OobMixin(object):
|
|
122
|
+
def __init__(self, model, fit_params=None):
|
|
123
|
+
super(OobMixin, self).__init__(model, fit_params)
|
|
124
|
+
self.train_x = None
|
|
125
|
+
|
|
126
|
+
def fit(self, x, y):
|
|
127
|
+
super(OobMixin, self).fit(x, y)
|
|
128
|
+
self.train_x = x
|
|
129
|
+
|
|
130
|
+
def _underlying_predict(self, x):
|
|
131
|
+
# TODO: sub-sampling of ensemble for test patterns
|
|
132
|
+
oob = x == self.train_x
|
|
133
|
+
|
|
134
|
+
if hasattr(oob, "all"):
|
|
135
|
+
oob = oob.all()
|
|
136
|
+
|
|
137
|
+
if oob:
|
|
138
|
+
return self._oob_prediction()
|
|
139
|
+
else:
|
|
140
|
+
return super(OobMixin, self)._underlying_predict(x)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class OobClassifierAdapter(OobMixin, ClassifierAdapter):
|
|
144
|
+
def __init__(self, model, fit_params=None):
|
|
145
|
+
super(OobClassifierAdapter, self).__init__(model, fit_params)
|
|
146
|
+
|
|
147
|
+
def _oob_prediction(self):
|
|
148
|
+
return self.model.oob_decision_function_
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
class OobRegressorAdapter(OobMixin, RegressorAdapter):
|
|
152
|
+
def __init__(self, model, fit_params=None):
|
|
153
|
+
super(OobRegressorAdapter, self).__init__(model, fit_params)
|
|
154
|
+
|
|
155
|
+
def _oob_prediction(self):
|
|
156
|
+
return self.model.oob_prediction_
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
from .icp import *
|
|
2
|
+
|
|
3
|
+
# TODO: move contents from nonconformist.icp here
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
# -----------------------------------------------------------------------------
|
|
7
|
+
# TcpClassifier
|
|
8
|
+
# -----------------------------------------------------------------------------
|
|
9
|
+
class TcpClassifier(BaseEstimator, ClassifierMixin):
|
|
10
|
+
"""Transductive conformal classifier.
|
|
11
|
+
|
|
12
|
+
Parameters
|
|
13
|
+
----------
|
|
14
|
+
nc_function : BaseScorer
|
|
15
|
+
Nonconformity scorer object used to calculate nonconformity of
|
|
16
|
+
calibration examples and test patterns. Should implement ``fit(x, y)``
|
|
17
|
+
and ``calc_nc(x, y)``.
|
|
18
|
+
|
|
19
|
+
smoothing : boolean
|
|
20
|
+
Decides whether to use stochastic smoothing of p-values.
|
|
21
|
+
|
|
22
|
+
Attributes
|
|
23
|
+
----------
|
|
24
|
+
train_x : numpy array of shape [n_cal_examples, n_features]
|
|
25
|
+
Inputs of training set.
|
|
26
|
+
|
|
27
|
+
train_y : numpy array of shape [n_cal_examples]
|
|
28
|
+
Outputs of calibration set.
|
|
29
|
+
|
|
30
|
+
nc_function : BaseScorer
|
|
31
|
+
Nonconformity scorer object used to calculate nonconformity scores.
|
|
32
|
+
|
|
33
|
+
classes : numpy array of shape [n_classes]
|
|
34
|
+
List of class labels, with indices corresponding to output columns
|
|
35
|
+
of TcpClassifier.predict()
|
|
36
|
+
|
|
37
|
+
See also
|
|
38
|
+
--------
|
|
39
|
+
IcpClassifier
|
|
40
|
+
|
|
41
|
+
References
|
|
42
|
+
----------
|
|
43
|
+
.. [1] Vovk, V., Gammerman, A., & Shafer, G. (2005). Algorithmic learning
|
|
44
|
+
in a random world. Springer Science & Business Media.
|
|
45
|
+
|
|
46
|
+
Examples
|
|
47
|
+
--------
|
|
48
|
+
>>> import numpy as np
|
|
49
|
+
>>> from sklearn.datasets import load_iris
|
|
50
|
+
>>> from sklearn.svm import SVC
|
|
51
|
+
>>> from nonconformist.base import ClassifierAdapter
|
|
52
|
+
>>> from nonconformist.cp import TcpClassifier
|
|
53
|
+
>>> from nonconformist.nc import ClassifierNc, MarginErrFunc
|
|
54
|
+
>>> iris = load_iris()
|
|
55
|
+
>>> idx = np.random.permutation(iris.target.size)
|
|
56
|
+
>>> train = idx[:int(idx.size / 2)]
|
|
57
|
+
>>> test = idx[int(idx.size / 2):]
|
|
58
|
+
>>> model = ClassifierAdapter(SVC(probability=True))
|
|
59
|
+
>>> nc = ClassifierNc(model, MarginErrFunc())
|
|
60
|
+
>>> tcp = TcpClassifier(nc)
|
|
61
|
+
>>> tcp.fit(iris.data[train, :], iris.target[train])
|
|
62
|
+
>>> tcp.predict(iris.data[test, :], significance=0.10)
|
|
63
|
+
... # doctest: +SKIP
|
|
64
|
+
array([[ True, False, False],
|
|
65
|
+
[False, True, False],
|
|
66
|
+
...,
|
|
67
|
+
[False, True, False],
|
|
68
|
+
[False, True, False]], dtype=bool)
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
def __init__(self, nc_function, condition=None, smoothing=True):
|
|
72
|
+
self.train_x, self.train_y = None, None
|
|
73
|
+
self.nc_function = nc_function
|
|
74
|
+
super(TcpClassifier, self).__init__()
|
|
75
|
+
|
|
76
|
+
# Check if condition-parameter is the default function (i.e.,
|
|
77
|
+
# lambda x: 0). This is so we can safely clone the object without
|
|
78
|
+
# the clone accidentally having self.conditional = True.
|
|
79
|
+
def default_condition(x):
|
|
80
|
+
return 0
|
|
81
|
+
|
|
82
|
+
is_default = callable(condition) and (
|
|
83
|
+
condition.__code__.co_code == default_condition.__code__.co_code
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
if is_default:
|
|
87
|
+
self.condition = condition
|
|
88
|
+
self.conditional = False
|
|
89
|
+
elif callable(condition):
|
|
90
|
+
self.condition = condition
|
|
91
|
+
self.conditional = True
|
|
92
|
+
else:
|
|
93
|
+
self.condition = lambda x: 0
|
|
94
|
+
self.conditional = False
|
|
95
|
+
|
|
96
|
+
self.smoothing = smoothing
|
|
97
|
+
|
|
98
|
+
self.base_icp = IcpClassifier(
|
|
99
|
+
self.nc_function, self.condition, self.smoothing
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
self.classes = None
|
|
103
|
+
|
|
104
|
+
def fit(self, x, y):
|
|
105
|
+
self.train_x, self.train_y = x, y
|
|
106
|
+
self.classes = np.unique(y)
|
|
107
|
+
|
|
108
|
+
def predict(self, x, significance=None):
|
|
109
|
+
"""Predict the output values for a set of input patterns.
|
|
110
|
+
|
|
111
|
+
Parameters
|
|
112
|
+
----------
|
|
113
|
+
x : numpy array of shape [n_samples, n_features]
|
|
114
|
+
Inputs of patters for which to predict output values.
|
|
115
|
+
|
|
116
|
+
significance : float or None
|
|
117
|
+
Significance level (maximum allowed error rate) of predictions.
|
|
118
|
+
Should be a float between 0 and 1. If ``None``, then the p-values
|
|
119
|
+
are output rather than the predictions.
|
|
120
|
+
|
|
121
|
+
Returns
|
|
122
|
+
-------
|
|
123
|
+
p : numpy array of shape [n_samples, n_classes]
|
|
124
|
+
If significance is ``None``, then p contains the p-values for each
|
|
125
|
+
sample-class pair; if significance is a float between 0 and 1, then
|
|
126
|
+
p is a boolean array denoting which labels are included in the
|
|
127
|
+
prediction sets.
|
|
128
|
+
"""
|
|
129
|
+
n_test = x.shape[0]
|
|
130
|
+
n_train = self.train_x.shape[0]
|
|
131
|
+
p = np.zeros((n_test, self.classes.size))
|
|
132
|
+
for i in range(n_test):
|
|
133
|
+
for j, y in enumerate(self.classes):
|
|
134
|
+
train_x = np.vstack([self.train_x, x[i, :]])
|
|
135
|
+
train_y = np.hstack([self.train_y, y])
|
|
136
|
+
self.base_icp.fit(train_x, train_y)
|
|
137
|
+
scores = self.base_icp.nc_function.score(train_x, train_y)
|
|
138
|
+
ngt = (scores[:-1] > scores[-1]).sum()
|
|
139
|
+
neq = (scores[:-1] == scores[-1]).sum()
|
|
140
|
+
|
|
141
|
+
p[i, j] = calc_p(n_train, ngt, neq, self.smoothing)
|
|
142
|
+
|
|
143
|
+
if significance is not None:
|
|
144
|
+
return p > significance
|
|
145
|
+
else:
|
|
146
|
+
return p
|
|
147
|
+
|
|
148
|
+
def predict_conf(self, x):
|
|
149
|
+
"""Predict the output values for a set of input patterns, using
|
|
150
|
+
the confidence-and-credibility output scheme.
|
|
151
|
+
|
|
152
|
+
Parameters
|
|
153
|
+
----------
|
|
154
|
+
x : numpy array of shape [n_samples, n_features]
|
|
155
|
+
Inputs of patters for which to predict output values.
|
|
156
|
+
|
|
157
|
+
Returns
|
|
158
|
+
-------
|
|
159
|
+
p : numpy array of shape [n_samples, 3]
|
|
160
|
+
p contains three columns: the first column contains the most
|
|
161
|
+
likely class for each test pattern; the second column contains
|
|
162
|
+
the confidence in the predicted class label, and the third column
|
|
163
|
+
contains the credibility of the prediction.
|
|
164
|
+
"""
|
|
165
|
+
p = self.predict(x, significance=None)
|
|
166
|
+
label = p.argmax(axis=1)
|
|
167
|
+
credibility = p.max(axis=1)
|
|
168
|
+
for i, idx in enumerate(label):
|
|
169
|
+
p[i, idx] = -np.inf
|
|
170
|
+
confidence = 1 - p.max(axis=1)
|
|
171
|
+
|
|
172
|
+
return np.array([label, confidence, credibility]).T
|