unifiedbooster 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,314 @@
1
+ from locale import normalize
2
+ import numpy as np
3
+ import pickle
4
+ from collections import namedtuple
5
+ from sklearn.base import BaseEstimator, RegressorMixin
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.ensemble import ExtraTreesRegressor
8
+ from sklearn.preprocessing import StandardScaler
9
+ from sklearn.neighbors import KernelDensity
10
+ from sklearn.model_selection import GridSearchCV
11
+ from scipy.stats import gaussian_kde
12
+ from tqdm import tqdm
13
+ from ..nonconformist import IcpRegressor
14
+ from ..nonconformist import RegressorNc
15
+ from ..nonconformist import RegressorNormalizer, AbsErrorErrFunc
16
+
17
+
18
+ class PredictionInterval(BaseEstimator, RegressorMixin):
19
+ """Class PredictionInterval: Obtain prediction intervals.
20
+
21
+ Attributes:
22
+
23
+ obj: an object;
24
+ fitted object containing methods `fit` and `predict`
25
+
26
+ method: a string;
27
+ method for constructing the prediction intervals.
28
+ Currently "splitconformal" (default) and "localconformal"
29
+
30
+ level: a float;
31
+ Confidence level for prediction intervals. Default is 95,
32
+ equivalent to a miscoverage error of 5 (%)
33
+
34
+ replications: an integer;
35
+ Number of replications for simulated conformal (default is `None`)
36
+
37
+ type_pi: a string;
38
+ type of prediction interval: currently "kde" (default) or "bootstrap"
39
+
40
+ type_split: a string;
41
+ "random" (random split of data) or "sequential" (sequential split of data)
42
+
43
+ seed: an integer;
44
+ Reproducibility of fit (there's a random split between fitting and calibration data)
45
+ """
46
+
47
+ def __init__(
48
+ self,
49
+ obj,
50
+ method="splitconformal",
51
+ level=95,
52
+ type_pi="bootstrap",
53
+ type_split="random",
54
+ replications=None,
55
+ kernel=None,
56
+ agg="mean",
57
+ seed=123,
58
+ ):
59
+
60
+ self.obj = obj
61
+ self.method = method
62
+ self.level = level
63
+ self.type_pi = type_pi
64
+ self.type_split = type_split
65
+ self.replications = replications
66
+ self.kernel = kernel
67
+ self.agg = agg
68
+ self.seed = seed
69
+ self.alpha_ = 1 - self.level / 100
70
+ self.quantile_ = None
71
+ self.icp_ = None
72
+ self.calibrated_residuals_ = None
73
+ self.scaled_calibrated_residuals_ = None
74
+ self.calibrated_residuals_scaler_ = None
75
+ self.kde_ = None
76
+
77
+ def fit(self, X, y):
78
+ """Fit the `method` to training data (X, y).
79
+
80
+ Args:
81
+
82
+ X: array-like, shape = [n_samples, n_features];
83
+ Training set vectors, where n_samples is the number
84
+ of samples and n_features is the number of features.
85
+
86
+ y: array-like, shape = [n_samples, ]; Target values.
87
+
88
+ """
89
+
90
+ if self.type_split == "random":
91
+ X_train, X_calibration, y_train, y_calibration = train_test_split(
92
+ X, y, test_size=0.5, random_state=self.seed
93
+ )
94
+ elif self.type_split == "sequential":
95
+ n_x = X.shape[0]
96
+ n_x_half = n_x // 2
97
+ first_half_idx = range(0, n_x_half)
98
+ second_half_idx = range(n_x_half, n_x)
99
+ X_train = X[first_half_idx, :]
100
+ X_calibration = X[second_half_idx, :]
101
+ y_train = y[first_half_idx]
102
+ y_calibration = y[second_half_idx]
103
+
104
+ if self.method == "splitconformal":
105
+
106
+ n_samples_calibration = X_calibration.shape[0]
107
+ self.obj.fit(X_train, y_train)
108
+ preds_calibration = self.obj.predict(X_calibration)
109
+ self.calibrated_residuals_ = y_calibration - preds_calibration
110
+ absolute_residuals = np.abs(self.calibrated_residuals_)
111
+ self.calibrated_residuals_scaler_ = StandardScaler(
112
+ with_mean=True, with_std=True
113
+ )
114
+ self.scaled_calibrated_residuals_ = (
115
+ self.calibrated_residuals_scaler_.fit_transform(
116
+ self.calibrated_residuals_.reshape(-1, 1)
117
+ ).ravel()
118
+ )
119
+ try:
120
+ # numpy version >= 1.22
121
+ self.quantile_ = np.quantile(
122
+ a=absolute_residuals, q=self.level / 100, method="higher"
123
+ )
124
+ except:
125
+ # numpy version < 1.22
126
+ self.quantile_ = np.quantile(
127
+ a=absolute_residuals,
128
+ q=self.level / 100,
129
+ interpolation="higher",
130
+ )
131
+
132
+ if self.method == "localconformal":
133
+
134
+ mad_estimator = ExtraTreesRegressor()
135
+ normalizer = RegressorNormalizer(
136
+ self.obj, mad_estimator, AbsErrorErrFunc()
137
+ )
138
+ nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer)
139
+ self.icp_ = IcpRegressor(nc)
140
+ self.icp_.fit(X_train, y_train)
141
+ self.icp_.calibrate(X_calibration, y_calibration)
142
+
143
+ return self
144
+
145
+ def predict(self, X, return_pi=False):
146
+ """Obtain predictions and prediction intervals
147
+
148
+ Args:
149
+
150
+ X: array-like, shape = [n_samples, n_features];
151
+ Testing set vectors, where n_samples is the number
152
+ of samples and n_features is the number of features.
153
+
154
+ return_pi: boolean
155
+ Whether the prediction interval is returned or not.
156
+ Default is False, for compatibility with other _estimators_.
157
+ If True, a tuple containing the predictions + lower and upper
158
+ bounds is returned.
159
+
160
+ """
161
+
162
+ pred = self.obj.predict(X)
163
+
164
+ if self.method == "splitconformal":
165
+
166
+ if self.replications is None:
167
+
168
+ if return_pi:
169
+
170
+ DescribeResult = namedtuple(
171
+ "DescribeResult", ("mean", "lower", "upper")
172
+ )
173
+
174
+ return DescribeResult(
175
+ pred, pred - self.quantile_, pred + self.quantile_
176
+ )
177
+
178
+ else:
179
+
180
+ return pred
181
+
182
+ else: # if self.replications is not None
183
+
184
+ assert self.type_pi in (
185
+ "bootstrap",
186
+ "kde",
187
+ ), "`self.type_pi` must be in ('bootstrap', 'kde')"
188
+
189
+ if self.type_pi == "bootstrap":
190
+ np.random.seed(self.seed)
191
+ self.residuals_sims_ = np.asarray(
192
+ [
193
+ np.random.choice(
194
+ a=self.scaled_calibrated_residuals_,
195
+ size=X.shape[0],
196
+ )
197
+ for _ in range(self.replications)
198
+ ]
199
+ ).T
200
+ self.sims_ = np.asarray(
201
+ [
202
+ pred
203
+ + self.calibrated_residuals_scaler_.scale_[0]
204
+ * self.residuals_sims_[:, i].ravel()
205
+ for i in range(self.replications)
206
+ ]
207
+ ).T
208
+ elif self.type_pi == "kde":
209
+ self.kde_ = gaussian_kde(
210
+ dataset=self.scaled_calibrated_residuals_
211
+ )
212
+ self.sims_ = np.asarray(
213
+ [
214
+ pred
215
+ + self.calibrated_residuals_scaler_.scale_[0]
216
+ * self.kde_.resample(
217
+ size=X.shape[0], seed=self.seed + i
218
+ ).ravel()
219
+ for i in range(self.replications)
220
+ ]
221
+ ).T
222
+
223
+ self.mean_ = np.mean(self.sims_, axis=1)
224
+ self.lower_ = np.quantile(
225
+ self.sims_, q=self.alpha_ / 200, axis=1
226
+ )
227
+ self.upper_ = np.quantile(
228
+ self.sims_, q=1 - self.alpha_ / 200, axis=1
229
+ )
230
+
231
+ DescribeResult = namedtuple(
232
+ "DescribeResult", ("mean", "sims", "lower", "upper")
233
+ )
234
+
235
+ return DescribeResult(
236
+ self.mean_, self.sims_, self.lower_, self.upper_
237
+ )
238
+
239
+ if self.method == "localconformal":
240
+
241
+ if self.replications is None:
242
+
243
+ if return_pi:
244
+
245
+ predictions_bounds = self.icp_.predict(
246
+ X, significance=1 - self.level
247
+ )
248
+ DescribeResult = namedtuple(
249
+ "DescribeResult", ("mean", "lower", "upper")
250
+ )
251
+ return DescribeResult(
252
+ pred, predictions_bounds[:, 0], predictions_bounds[:, 1]
253
+ )
254
+
255
+ else:
256
+
257
+ return pred
258
+
259
+ else: # if self.replications is not None
260
+
261
+ assert self.type_pi in (
262
+ "bootstrap",
263
+ "kde",
264
+ ), "`self.type_pi` must be in ('bootstrap', 'kde')"
265
+
266
+ if self.type_pi == "bootstrap":
267
+ np.random.seed(self.seed)
268
+ self.residuals_sims_ = np.asarray(
269
+ [
270
+ np.random.choice(
271
+ a=self.scaled_calibrated_residuals_,
272
+ size=X.shape[0],
273
+ )
274
+ for _ in range(self.replications)
275
+ ]
276
+ ).T
277
+ self.sims_ = np.asarray(
278
+ [
279
+ pred
280
+ + self.calibrated_residuals_scaler_.scale_[0]
281
+ * self.residuals_sims_[:, i].ravel()
282
+ for i in tqdm(range(self.replications))
283
+ ]
284
+ ).T
285
+ elif self.type_pi == "kde":
286
+ self.kde_ = gaussian_kde(
287
+ dataset=self.scaled_calibrated_residuals_
288
+ )
289
+ self.sims_ = np.asarray(
290
+ [
291
+ pred
292
+ + self.calibrated_residuals_scaler_.scale_[0]
293
+ * self.kde_.resample(
294
+ size=X.shape[0], seed=self.seed + i
295
+ ).ravel()
296
+ for i in tqdm(range(self.replications))
297
+ ]
298
+ ).T
299
+
300
+ self.mean_ = np.mean(self.sims_, axis=1)
301
+ self.lower_ = np.quantile(
302
+ self.sims_, q=self.alpha_ / 200, axis=1
303
+ )
304
+ self.upper_ = np.quantile(
305
+ self.sims_, q=1 - self.alpha_ / 200, axis=1
306
+ )
307
+
308
+ DescribeResult = namedtuple(
309
+ "DescribeResult", ("mean", "sims", "lower", "upper")
310
+ )
311
+
312
+ return DescribeResult(
313
+ self.mean_, self.sims_, self.lower_, self.upper_
314
+ )
@@ -0,0 +1,3 @@
1
+ from .predictionset import PredictionSet
2
+
3
+ __all__ = ["PredictionSet"]
@@ -0,0 +1,111 @@
1
+ from locale import normalize
2
+ import numpy as np
3
+ import pickle
4
+ from collections import namedtuple
5
+ from sklearn.base import BaseEstimator, ClassifierMixin
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.ensemble import ExtraTreesRegressor
8
+ from sklearn.preprocessing import StandardScaler
9
+ from scipy.stats import gaussian_kde
10
+ from tqdm import tqdm
11
+ from ..nonconformist import ClassifierAdapter, IcpClassifier, TcpClassifier, ClassifierNc, MarginErrFunc
12
+
13
+
14
+ class PredictionSet(BaseEstimator, ClassifierMixin):
15
+ """Class PredictionSet: Obtain prediction sets.
16
+
17
+ Attributes:
18
+
19
+ obj: an object;
20
+ fitted object containing methods `fit` and `predict`
21
+
22
+ method: a string;
23
+ method for constructing the prediction sets.
24
+ Currently "icp" (default, inductive conformal) and "tcp" (transductive conformal)
25
+
26
+ level: a float;
27
+ Confidence level for prediction sets. Default is None,
28
+ 95 is equivalent to a miscoverage error of 5 (%)
29
+
30
+ seed: an integer;
31
+ Reproducibility of fit (there's a random split between fitting and calibration data)
32
+ """
33
+
34
+ def __init__(
35
+ self,
36
+ obj,
37
+ method="icp",
38
+ level=None,
39
+ seed=123,
40
+ ):
41
+
42
+ self.obj = obj
43
+ self.method = method
44
+ self.level = level
45
+ self.seed = seed
46
+ if self.level is not None:
47
+ self.alpha_ = 1 - self.level / 100
48
+ self.quantile_ = None
49
+ self.icp_ = None
50
+ self.tcp_ = None
51
+
52
+ if self.method == "icp":
53
+ self.icp_ = IcpClassifier(
54
+ ClassifierNc(ClassifierAdapter(self.obj), MarginErrFunc()),
55
+ )
56
+ elif self.method == "tcp":
57
+ self.tcp_ = TcpClassifier(
58
+ ClassifierNc(ClassifierAdapter(self.obj), MarginErrFunc()),
59
+ )
60
+ else:
61
+ raise ValueError(
62
+ "`self.method` must be in ('icp', 'tcp')"
63
+ )
64
+
65
+
66
+ def fit(self, X, y):
67
+ """Fit the `method` to training data (X, y).
68
+
69
+ Args:
70
+
71
+ X: array-like, shape = [n_samples, n_features];
72
+ Training set vectors, where n_samples is the number
73
+ of samples and n_features is the number of features.
74
+
75
+ y: array-like, shape = [n_samples, ]; Target values.
76
+
77
+ """
78
+ if self.method == "icp":
79
+
80
+ X_train, X_calibration, y_train, y_calibration = train_test_split(
81
+ X, y, test_size=0.5, random_state=self.seed)
82
+ self.icp_.fit(X_train, y_train)
83
+ self.icp_.calibrate(X_calibration, y_calibration)
84
+
85
+ elif self.method == "tcp":
86
+
87
+ self.tcp_.fit(X, y)
88
+
89
+ return self
90
+
91
+ def predict(self, X):
92
+ """Obtain predictions and prediction sets
93
+
94
+ Args:
95
+
96
+ X: array-like, shape = [n_samples, n_features];
97
+ Testing set vectors, where n_samples is the number
98
+ of samples and n_features is the number of features.
99
+
100
+ """
101
+
102
+ if self.method == "icp":
103
+ return self.icp_.predict(X, significance=self.alpha_)
104
+
105
+ elif self.method == "tcp":
106
+ return self.tcp_.predict(X, significance=self.alpha_)
107
+
108
+ else:
109
+ raise ValueError(
110
+ "`self.method` must be in ('icp', 'tcp')"
111
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unifiedbooster
3
- Version: 0.5.0
3
+ Version: 0.7.0
4
4
  Summary: Unified interface for Gradient Boosted Decision Trees
5
5
  Home-page: https://github.com/thierrymoudiki/unifiedbooster
6
6
  Author: T. Moudiki
@@ -0,0 +1,23 @@
1
+ unifiedbooster/__init__.py,sha256=8FEkWCZ2tT8xcW46Z0X_BS9_r0kQWVAu37IncLq6QWU,301
2
+ unifiedbooster/gbdt.py,sha256=oAG-dQRY3FG9Tdhdb0iZuupMOAj1_KcGQbp47AHc72o,5175
3
+ unifiedbooster/gbdt_classification.py,sha256=c9MYlPeTjQ4pAy0CZtroid9UfhQAlQVHekCWbbTIMBQ,5798
4
+ unifiedbooster/gbdt_regression.py,sha256=tHi8XJ1jS2LuXdQoRDsTkFK3qt3L-4kQ9IRsMNW37gI,5351
5
+ unifiedbooster/gpoptimization.py,sha256=UoT20E5dfhREiY7Cqo0vCktBzDBRnnG_6Xyg426vdfk,15238
6
+ unifiedbooster/nonconformist/__init__.py,sha256=rETO9FfHb_yWs4ttLa2FJb2NAy-KFnyESeBTltDwJQA,739
7
+ unifiedbooster/nonconformist/acp.py,sha256=SrfBVCWjXvntkBJ7GXTFYE6i6NU3Pv-5ibwhpItDKDw,11553
8
+ unifiedbooster/nonconformist/base.py,sha256=Ycyt6pwxo0QjD3qBAfDqjzFvFfknIMkX0_yIc6EtPFo,4028
9
+ unifiedbooster/nonconformist/cp.py,sha256=YKiBFKwvaJbWnJcgi-saiVD_2ci-LBDHgytf70jHvFg,6174
10
+ unifiedbooster/nonconformist/evaluation.py,sha256=b24buhhW3v3CKRSi69WKCq9Sb38Unmjr8TAZr66Cdns,15906
11
+ unifiedbooster/nonconformist/icp.py,sha256=wqOaoy22KiF2ebVQOjp8MR-zvEjT0hE0NiMfeNZOQEw,15982
12
+ unifiedbooster/nonconformist/nc.py,sha256=_ED8Yn068Ivio9Xr0SjwKh4Ts5MfUACZFY40ObxPJ60,19644
13
+ unifiedbooster/nonconformist/util.py,sha256=UBKlAEb0mj9MVWBOKCRAq_OQP5Z53FMqWlTyo7RIg5Q,242
14
+ unifiedbooster/predictioninterval/__init__.py,sha256=I1X1omp6Bsuzfm7z8TCSICe2175rHrdoXWEDOicOP8U,85
15
+ unifiedbooster/predictioninterval/predictioninterval.py,sha256=6XQnJQDpsWG-uu5yFxeZQewnrErAjZLzv21YvtarXZQ,11164
16
+ unifiedbooster/predictionset/__init__.py,sha256=IGhWVX8-VeZ15HeLFWu8QeKCz7DIE4TlEXMjTnB3VdE,70
17
+ unifiedbooster/predictionset/predictionset.py,sha256=k9s2PYK2KvOfDoGfSGXUHwwNA9kL2VYiT2JPokwZ8YA,3415
18
+ unifiedbooster-0.7.0.dist-info/LICENSE,sha256=3rWw63btcdqbC0XMnpzCQhxDP8Vx7yKkKS7EDgJiY_4,1061
19
+ unifiedbooster-0.7.0.dist-info/METADATA,sha256=7vR-c8aCOeF-96Uv9uBTugKmA-QC71b_5NyejATpnDM,955
20
+ unifiedbooster-0.7.0.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
21
+ unifiedbooster-0.7.0.dist-info/entry_points.txt,sha256=OVNTsCzMYnaJ11WIByB7G8Lym_dj-ERKZyQxWFUcW30,59
22
+ unifiedbooster-0.7.0.dist-info/top_level.txt,sha256=gOMxxpRtx8_nJXTWsXJDFkNeCsjSJQPs6aUXKK5_nI4,15
23
+ unifiedbooster-0.7.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (72.1.0)
2
+ Generator: setuptools (74.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,11 +0,0 @@
1
- unifiedbooster/__init__.py,sha256=8FEkWCZ2tT8xcW46Z0X_BS9_r0kQWVAu37IncLq6QWU,301
2
- unifiedbooster/gbdt.py,sha256=u5Sjw-V8BlDS4LUo_SNOfuz66EFcJhP1Al6Es41R_X8,4932
3
- unifiedbooster/gbdt_classification.py,sha256=wifw86cUvsyiKSz8MTxIgH6j7Gd1voIxXUiJVsE68bk,4219
4
- unifiedbooster/gbdt_regression.py,sha256=YQIDtW4hV7DxHAHuoMMkD1aRy0dzVXxx2rwPu3InTA8,3710
5
- unifiedbooster/gpoptimization.py,sha256=S-yZI8qI_QZyoCqWj8MT0a2Djlo3YrYRjyXApLS9FXM,12831
6
- unifiedbooster-0.5.0.dist-info/LICENSE,sha256=3rWw63btcdqbC0XMnpzCQhxDP8Vx7yKkKS7EDgJiY_4,1061
7
- unifiedbooster-0.5.0.dist-info/METADATA,sha256=mao-q4w_f26KVwKSy4ZPEJBZQIRARtXsWEN7t7JEwRw,955
8
- unifiedbooster-0.5.0.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
9
- unifiedbooster-0.5.0.dist-info/entry_points.txt,sha256=OVNTsCzMYnaJ11WIByB7G8Lym_dj-ERKZyQxWFUcW30,59
10
- unifiedbooster-0.5.0.dist-info/top_level.txt,sha256=gOMxxpRtx8_nJXTWsXJDFkNeCsjSJQPs6aUXKK5_nI4,15
11
- unifiedbooster-0.5.0.dist-info/RECORD,,