unifiedbooster 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,314 @@
1
+ from locale import normalize
2
+ import numpy as np
3
+ import pickle
4
+ from collections import namedtuple
5
+ from sklearn.base import BaseEstimator, RegressorMixin
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.ensemble import ExtraTreesRegressor
8
+ from sklearn.preprocessing import StandardScaler
9
+ from sklearn.neighbors import KernelDensity
10
+ from sklearn.model_selection import GridSearchCV
11
+ from scipy.stats import gaussian_kde
12
+ from tqdm import tqdm
13
+ from ..nonconformist import IcpRegressor
14
+ from ..nonconformist import RegressorNc
15
+ from ..nonconformist import RegressorNormalizer, AbsErrorErrFunc
16
+
17
+
18
+ class PredictionInterval(BaseEstimator, RegressorMixin):
19
+ """Class PredictionInterval: Obtain prediction intervals.
20
+
21
+ Attributes:
22
+
23
+ obj: an object;
24
+ fitted object containing methods `fit` and `predict`
25
+
26
+ method: a string;
27
+ method for constructing the prediction intervals.
28
+ Currently "splitconformal" (default) and "localconformal"
29
+
30
+ level: a float;
31
+ Confidence level for prediction intervals. Default is 95,
32
+ equivalent to a miscoverage error of 5 (%)
33
+
34
+ replications: an integer;
35
+ Number of replications for simulated conformal (default is `None`)
36
+
37
+ type_pi: a string;
38
+ type of prediction interval: currently "kde" (default) or "bootstrap"
39
+
40
+ type_split: a string;
41
+ "random" (random split of data) or "sequential" (sequential split of data)
42
+
43
+ seed: an integer;
44
+ Reproducibility of fit (there's a random split between fitting and calibration data)
45
+ """
46
+
47
+ def __init__(
48
+ self,
49
+ obj,
50
+ method="splitconformal",
51
+ level=95,
52
+ type_pi="bootstrap",
53
+ type_split="random",
54
+ replications=None,
55
+ kernel=None,
56
+ agg="mean",
57
+ seed=123,
58
+ ):
59
+
60
+ self.obj = obj
61
+ self.method = method
62
+ self.level = level
63
+ self.type_pi = type_pi
64
+ self.type_split = type_split
65
+ self.replications = replications
66
+ self.kernel = kernel
67
+ self.agg = agg
68
+ self.seed = seed
69
+ self.alpha_ = 1 - self.level / 100
70
+ self.quantile_ = None
71
+ self.icp_ = None
72
+ self.calibrated_residuals_ = None
73
+ self.scaled_calibrated_residuals_ = None
74
+ self.calibrated_residuals_scaler_ = None
75
+ self.kde_ = None
76
+
77
+ def fit(self, X, y):
78
+ """Fit the `method` to training data (X, y).
79
+
80
+ Args:
81
+
82
+ X: array-like, shape = [n_samples, n_features];
83
+ Training set vectors, where n_samples is the number
84
+ of samples and n_features is the number of features.
85
+
86
+ y: array-like, shape = [n_samples, ]; Target values.
87
+
88
+ """
89
+
90
+ if self.type_split == "random":
91
+ X_train, X_calibration, y_train, y_calibration = train_test_split(
92
+ X, y, test_size=0.5, random_state=self.seed
93
+ )
94
+ elif self.type_split == "sequential":
95
+ n_x = X.shape[0]
96
+ n_x_half = n_x // 2
97
+ first_half_idx = range(0, n_x_half)
98
+ second_half_idx = range(n_x_half, n_x)
99
+ X_train = X[first_half_idx, :]
100
+ X_calibration = X[second_half_idx, :]
101
+ y_train = y[first_half_idx]
102
+ y_calibration = y[second_half_idx]
103
+
104
+ if self.method == "splitconformal":
105
+
106
+ n_samples_calibration = X_calibration.shape[0]
107
+ self.obj.fit(X_train, y_train)
108
+ preds_calibration = self.obj.predict(X_calibration)
109
+ self.calibrated_residuals_ = y_calibration - preds_calibration
110
+ absolute_residuals = np.abs(self.calibrated_residuals_)
111
+ self.calibrated_residuals_scaler_ = StandardScaler(
112
+ with_mean=True, with_std=True
113
+ )
114
+ self.scaled_calibrated_residuals_ = (
115
+ self.calibrated_residuals_scaler_.fit_transform(
116
+ self.calibrated_residuals_.reshape(-1, 1)
117
+ ).ravel()
118
+ )
119
+ try:
120
+ # numpy version >= 1.22
121
+ self.quantile_ = np.quantile(
122
+ a=absolute_residuals, q=self.level / 100, method="higher"
123
+ )
124
+ except:
125
+ # numpy version < 1.22
126
+ self.quantile_ = np.quantile(
127
+ a=absolute_residuals,
128
+ q=self.level / 100,
129
+ interpolation="higher",
130
+ )
131
+
132
+ if self.method == "localconformal":
133
+
134
+ mad_estimator = ExtraTreesRegressor()
135
+ normalizer = RegressorNormalizer(
136
+ self.obj, mad_estimator, AbsErrorErrFunc()
137
+ )
138
+ nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer)
139
+ self.icp_ = IcpRegressor(nc)
140
+ self.icp_.fit(X_train, y_train)
141
+ self.icp_.calibrate(X_calibration, y_calibration)
142
+
143
+ return self
144
+
145
+ def predict(self, X, return_pi=False):
146
+ """Obtain predictions and prediction intervals
147
+
148
+ Args:
149
+
150
+ X: array-like, shape = [n_samples, n_features];
151
+ Testing set vectors, where n_samples is the number
152
+ of samples and n_features is the number of features.
153
+
154
+ return_pi: boolean
155
+ Whether the prediction interval is returned or not.
156
+ Default is False, for compatibility with other _estimators_.
157
+ If True, a tuple containing the predictions + lower and upper
158
+ bounds is returned.
159
+
160
+ """
161
+
162
+ pred = self.obj.predict(X)
163
+
164
+ if self.method == "splitconformal":
165
+
166
+ if self.replications is None:
167
+
168
+ if return_pi:
169
+
170
+ DescribeResult = namedtuple(
171
+ "DescribeResult", ("mean", "lower", "upper")
172
+ )
173
+
174
+ return DescribeResult(
175
+ pred, pred - self.quantile_, pred + self.quantile_
176
+ )
177
+
178
+ else:
179
+
180
+ return pred
181
+
182
+ else: # if self.replications is not None
183
+
184
+ assert self.type_pi in (
185
+ "bootstrap",
186
+ "kde",
187
+ ), "`self.type_pi` must be in ('bootstrap', 'kde')"
188
+
189
+ if self.type_pi == "bootstrap":
190
+ np.random.seed(self.seed)
191
+ self.residuals_sims_ = np.asarray(
192
+ [
193
+ np.random.choice(
194
+ a=self.scaled_calibrated_residuals_,
195
+ size=X.shape[0],
196
+ )
197
+ for _ in range(self.replications)
198
+ ]
199
+ ).T
200
+ self.sims_ = np.asarray(
201
+ [
202
+ pred
203
+ + self.calibrated_residuals_scaler_.scale_[0]
204
+ * self.residuals_sims_[:, i].ravel()
205
+ for i in range(self.replications)
206
+ ]
207
+ ).T
208
+ elif self.type_pi == "kde":
209
+ self.kde_ = gaussian_kde(
210
+ dataset=self.scaled_calibrated_residuals_
211
+ )
212
+ self.sims_ = np.asarray(
213
+ [
214
+ pred
215
+ + self.calibrated_residuals_scaler_.scale_[0]
216
+ * self.kde_.resample(
217
+ size=X.shape[0], seed=self.seed + i
218
+ ).ravel()
219
+ for i in range(self.replications)
220
+ ]
221
+ ).T
222
+
223
+ self.mean_ = np.mean(self.sims_, axis=1)
224
+ self.lower_ = np.quantile(
225
+ self.sims_, q=self.alpha_ / 200, axis=1
226
+ )
227
+ self.upper_ = np.quantile(
228
+ self.sims_, q=1 - self.alpha_ / 200, axis=1
229
+ )
230
+
231
+ DescribeResult = namedtuple(
232
+ "DescribeResult", ("mean", "sims", "lower", "upper")
233
+ )
234
+
235
+ return DescribeResult(
236
+ self.mean_, self.sims_, self.lower_, self.upper_
237
+ )
238
+
239
+ if self.method == "localconformal":
240
+
241
+ if self.replications is None:
242
+
243
+ if return_pi:
244
+
245
+ predictions_bounds = self.icp_.predict(
246
+ X, significance=1 - self.level
247
+ )
248
+ DescribeResult = namedtuple(
249
+ "DescribeResult", ("mean", "lower", "upper")
250
+ )
251
+ return DescribeResult(
252
+ pred, predictions_bounds[:, 0], predictions_bounds[:, 1]
253
+ )
254
+
255
+ else:
256
+
257
+ return pred
258
+
259
+ else: # if self.replications is not None
260
+
261
+ assert self.type_pi in (
262
+ "bootstrap",
263
+ "kde",
264
+ ), "`self.type_pi` must be in ('bootstrap', 'kde')"
265
+
266
+ if self.type_pi == "bootstrap":
267
+ np.random.seed(self.seed)
268
+ self.residuals_sims_ = np.asarray(
269
+ [
270
+ np.random.choice(
271
+ a=self.scaled_calibrated_residuals_,
272
+ size=X.shape[0],
273
+ )
274
+ for _ in range(self.replications)
275
+ ]
276
+ ).T
277
+ self.sims_ = np.asarray(
278
+ [
279
+ pred
280
+ + self.calibrated_residuals_scaler_.scale_[0]
281
+ * self.residuals_sims_[:, i].ravel()
282
+ for i in tqdm(range(self.replications))
283
+ ]
284
+ ).T
285
+ elif self.type_pi == "kde":
286
+ self.kde_ = gaussian_kde(
287
+ dataset=self.scaled_calibrated_residuals_
288
+ )
289
+ self.sims_ = np.asarray(
290
+ [
291
+ pred
292
+ + self.calibrated_residuals_scaler_.scale_[0]
293
+ * self.kde_.resample(
294
+ size=X.shape[0], seed=self.seed + i
295
+ ).ravel()
296
+ for i in tqdm(range(self.replications))
297
+ ]
298
+ ).T
299
+
300
+ self.mean_ = np.mean(self.sims_, axis=1)
301
+ self.lower_ = np.quantile(
302
+ self.sims_, q=self.alpha_ / 200, axis=1
303
+ )
304
+ self.upper_ = np.quantile(
305
+ self.sims_, q=1 - self.alpha_ / 200, axis=1
306
+ )
307
+
308
+ DescribeResult = namedtuple(
309
+ "DescribeResult", ("mean", "sims", "lower", "upper")
310
+ )
311
+
312
+ return DescribeResult(
313
+ self.mean_, self.sims_, self.lower_, self.upper_
314
+ )
@@ -0,0 +1,3 @@
1
+ from .predictionset import PredictionSet
2
+
3
+ __all__ = ["PredictionSet"]
@@ -0,0 +1,113 @@
1
+ from locale import normalize
2
+ import numpy as np
3
+ import pickle
4
+ from collections import namedtuple
5
+ from sklearn.base import BaseEstimator, ClassifierMixin
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.ensemble import ExtraTreesRegressor
8
+ from sklearn.preprocessing import StandardScaler
9
+ from scipy.stats import gaussian_kde
10
+ from tqdm import tqdm
11
+ from ..nonconformist import (
12
+ ClassifierAdapter,
13
+ IcpClassifier,
14
+ TcpClassifier,
15
+ ClassifierNc,
16
+ MarginErrFunc,
17
+ )
18
+
19
+
20
+ class PredictionSet(BaseEstimator, ClassifierMixin):
21
+ """Class PredictionSet: Obtain prediction sets.
22
+
23
+ Attributes:
24
+
25
+ obj: an object;
26
+ fitted object containing methods `fit` and `predict`
27
+
28
+ method: a string;
29
+ method for constructing the prediction sets.
30
+ Currently "icp" (default, inductive conformal) and "tcp" (transductive conformal)
31
+
32
+ level: a float;
33
+ Confidence level for prediction sets. Default is None,
34
+ 95 is equivalent to a miscoverage error of 5 (%)
35
+
36
+ seed: an integer;
37
+ Reproducibility of fit (there's a random split between fitting and calibration data)
38
+ """
39
+
40
+ def __init__(
41
+ self,
42
+ obj,
43
+ method="icp",
44
+ level=None,
45
+ seed=123,
46
+ ):
47
+
48
+ self.obj = obj
49
+ self.method = method
50
+ self.level = level
51
+ self.seed = seed
52
+ if self.level is not None:
53
+ self.alpha_ = 1 - self.level / 100
54
+ self.quantile_ = None
55
+ self.icp_ = None
56
+ self.tcp_ = None
57
+
58
+ if self.method == "icp":
59
+ self.icp_ = IcpClassifier(
60
+ ClassifierNc(ClassifierAdapter(self.obj), MarginErrFunc()),
61
+ )
62
+ elif self.method == "tcp":
63
+ self.tcp_ = TcpClassifier(
64
+ ClassifierNc(ClassifierAdapter(self.obj), MarginErrFunc()),
65
+ )
66
+ else:
67
+ raise ValueError("`self.method` must be in ('icp', 'tcp')")
68
+
69
+ def fit(self, X, y):
70
+ """Fit the `method` to training data (X, y).
71
+
72
+ Args:
73
+
74
+ X: array-like, shape = [n_samples, n_features];
75
+ Training set vectors, where n_samples is the number
76
+ of samples and n_features is the number of features.
77
+
78
+ y: array-like, shape = [n_samples, ]; Target values.
79
+
80
+ """
81
+ if self.method == "icp":
82
+
83
+ X_train, X_calibration, y_train, y_calibration = train_test_split(
84
+ X, y, test_size=0.5, random_state=self.seed
85
+ )
86
+ self.icp_.fit(X_train, y_train)
87
+ self.icp_.calibrate(X_calibration, y_calibration)
88
+
89
+ elif self.method == "tcp":
90
+
91
+ self.tcp_.fit(X, y)
92
+
93
+ return self
94
+
95
+ def predict(self, X):
96
+ """Obtain predictions and prediction sets
97
+
98
+ Args:
99
+
100
+ X: array-like, shape = [n_samples, n_features];
101
+ Testing set vectors, where n_samples is the number
102
+ of samples and n_features is the number of features.
103
+
104
+ """
105
+
106
+ if self.method == "icp":
107
+ return self.icp_.predict(X, significance=self.alpha_)
108
+
109
+ elif self.method == "tcp":
110
+ return self.tcp_.predict(X, significance=self.alpha_)
111
+
112
+ else:
113
+ raise ValueError("`self.method` must be in ('icp', 'tcp')")
@@ -0,0 +1,39 @@
1
+ Metadata-Version: 2.4
2
+ Name: unifiedbooster
3
+ Version: 0.9.0
4
+ Summary: Unified interface for Gradient Boosted Decision Trees
5
+ Home-page: https://github.com/thierrymoudiki/unifiedbooster
6
+ Author: T. Moudiki
7
+ Author-email: thierry.moudiki@gmail.com
8
+ License: BSD license
9
+ Keywords: unifiedbooster
10
+ Classifier: Development Status :: 2 - Pre-Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: BSD License
13
+ Classifier: Natural Language :: English
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.6
16
+ Classifier: Programming Language :: Python :: 3.7
17
+ Classifier: Programming Language :: Python :: 3.8
18
+ Requires-Python: >=3.6
19
+ License-File: LICENSE
20
+ Requires-Dist: Cython
21
+ Requires-Dist: numpy
22
+ Requires-Dist: scikit-learn
23
+ Requires-Dist: xgboost
24
+ Requires-Dist: lightgbm
25
+ Requires-Dist: GPopt
26
+ Requires-Dist: nnetsauce
27
+ Dynamic: author
28
+ Dynamic: author-email
29
+ Dynamic: classifier
30
+ Dynamic: description
31
+ Dynamic: home-page
32
+ Dynamic: keywords
33
+ Dynamic: license
34
+ Dynamic: license-file
35
+ Dynamic: requires-dist
36
+ Dynamic: requires-python
37
+ Dynamic: summary
38
+
39
+ Unified interface for Gradient Boosted Decision Trees
@@ -0,0 +1,23 @@
1
+ unifiedbooster/__init__.py,sha256=8FEkWCZ2tT8xcW46Z0X_BS9_r0kQWVAu37IncLq6QWU,301
2
+ unifiedbooster/gbdt.py,sha256=FSaZngKlFR943cq1QclZlsOFjM6tmX446e6GeoGtA6Q,5176
3
+ unifiedbooster/gbdt_classification.py,sha256=NbmngLNGt4X1xuTIFKDVP4bS5ieAU_rNu_axSpbbJq0,5795
4
+ unifiedbooster/gbdt_regression.py,sha256=5YjnOlPJOBcuYJN5lOFWA8VLYhIIVmkmceRjCJ4GdjA,5722
5
+ unifiedbooster/gpoptimization.py,sha256=UoT20E5dfhREiY7Cqo0vCktBzDBRnnG_6Xyg426vdfk,15238
6
+ unifiedbooster/nonconformist/__init__.py,sha256=sHEakjPhqUhmZwawAv34bHcTDmF1uZvqvGLIMjOM0B0,739
7
+ unifiedbooster/nonconformist/acp.py,sha256=SrfBVCWjXvntkBJ7GXTFYE6i6NU3Pv-5ibwhpItDKDw,11553
8
+ unifiedbooster/nonconformist/base.py,sha256=3nvSL_rL1Kxkj-lI5rEuMuK7fZyfrFqKKS1-UMdcLNA,4024
9
+ unifiedbooster/nonconformist/cp.py,sha256=YKiBFKwvaJbWnJcgi-saiVD_2ci-LBDHgytf70jHvFg,6174
10
+ unifiedbooster/nonconformist/evaluation.py,sha256=b24buhhW3v3CKRSi69WKCq9Sb38Unmjr8TAZr66Cdns,15906
11
+ unifiedbooster/nonconformist/icp.py,sha256=wqOaoy22KiF2ebVQOjp8MR-zvEjT0hE0NiMfeNZOQEw,15982
12
+ unifiedbooster/nonconformist/nc.py,sha256=_ED8Yn068Ivio9Xr0SjwKh4Ts5MfUACZFY40ObxPJ60,19644
13
+ unifiedbooster/nonconformist/util.py,sha256=UBKlAEb0mj9MVWBOKCRAq_OQP5Z53FMqWlTyo7RIg5Q,242
14
+ unifiedbooster/predictioninterval/__init__.py,sha256=I1X1omp6Bsuzfm7z8TCSICe2175rHrdoXWEDOicOP8U,85
15
+ unifiedbooster/predictioninterval/predictioninterval.py,sha256=6XQnJQDpsWG-uu5yFxeZQewnrErAjZLzv21YvtarXZQ,11164
16
+ unifiedbooster/predictionset/__init__.py,sha256=IGhWVX8-VeZ15HeLFWu8QeKCz7DIE4TlEXMjTnB3VdE,70
17
+ unifiedbooster/predictionset/predictionset.py,sha256=C38rC1qAhhXa8YUJjVB3yjYjPXToU1HVXBRoBevsRxk,3308
18
+ unifiedbooster-0.9.0.dist-info/licenses/LICENSE,sha256=3rWw63btcdqbC0XMnpzCQhxDP8Vx7yKkKS7EDgJiY_4,1061
19
+ unifiedbooster-0.9.0.dist-info/METADATA,sha256=0Az8dhNCauLHJYfj55TKsIgBUX_RhZ5_VksqFcNVIMs,1151
20
+ unifiedbooster-0.9.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
21
+ unifiedbooster-0.9.0.dist-info/entry_points.txt,sha256=OVNTsCzMYnaJ11WIByB7G8Lym_dj-ERKZyQxWFUcW30,59
22
+ unifiedbooster-0.9.0.dist-info/top_level.txt,sha256=gOMxxpRtx8_nJXTWsXJDFkNeCsjSJQPs6aUXKK5_nI4,15
23
+ unifiedbooster-0.9.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ unifiedbooster = unifiedbooster.cli:main
@@ -0,0 +1,7 @@
1
+ Copyright <2024> <T. Moudiki>
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1 @@
1
+ unifiedbooster