unifiedbooster 0.7.0__tar.gz → 0.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {unifiedbooster-0.7.0 → unifiedbooster-0.9.0}/PKG-INFO +13 -3
- {unifiedbooster-0.7.0 → unifiedbooster-0.9.0}/README.md +4 -1
- {unifiedbooster-0.7.0 → unifiedbooster-0.9.0}/setup.py +1 -3
- {unifiedbooster-0.7.0 → unifiedbooster-0.9.0}/unifiedbooster/gbdt.py +1 -0
- {unifiedbooster-0.7.0 → unifiedbooster-0.9.0}/unifiedbooster/gbdt_classification.py +32 -23
- {unifiedbooster-0.7.0 → unifiedbooster-0.9.0}/unifiedbooster/gbdt_regression.py +44 -25
- {unifiedbooster-0.7.0 → unifiedbooster-0.9.0}/unifiedbooster/nonconformist/__init__.py +2 -2
- {unifiedbooster-0.7.0 → unifiedbooster-0.9.0}/unifiedbooster/nonconformist/base.py +1 -1
- {unifiedbooster-0.7.0 → unifiedbooster-0.9.0}/unifiedbooster/predictionset/predictionset.py +19 -17
- {unifiedbooster-0.7.0 → unifiedbooster-0.9.0}/unifiedbooster.egg-info/PKG-INFO +13 -3
- {unifiedbooster-0.7.0 → unifiedbooster-0.9.0}/unifiedbooster.egg-info/requires.txt +0 -1
- {unifiedbooster-0.7.0 → unifiedbooster-0.9.0}/LICENSE +0 -0
- {unifiedbooster-0.7.0 → unifiedbooster-0.9.0}/setup.cfg +0 -0
- {unifiedbooster-0.7.0 → unifiedbooster-0.9.0}/unifiedbooster/__init__.py +0 -0
- {unifiedbooster-0.7.0 → unifiedbooster-0.9.0}/unifiedbooster/gpoptimization.py +0 -0
- {unifiedbooster-0.7.0 → unifiedbooster-0.9.0}/unifiedbooster/nonconformist/acp.py +0 -0
- {unifiedbooster-0.7.0 → unifiedbooster-0.9.0}/unifiedbooster/nonconformist/cp.py +0 -0
- {unifiedbooster-0.7.0 → unifiedbooster-0.9.0}/unifiedbooster/nonconformist/evaluation.py +0 -0
- {unifiedbooster-0.7.0 → unifiedbooster-0.9.0}/unifiedbooster/nonconformist/icp.py +0 -0
- {unifiedbooster-0.7.0 → unifiedbooster-0.9.0}/unifiedbooster/nonconformist/nc.py +0 -0
- {unifiedbooster-0.7.0 → unifiedbooster-0.9.0}/unifiedbooster/nonconformist/util.py +0 -0
- {unifiedbooster-0.7.0 → unifiedbooster-0.9.0}/unifiedbooster/predictioninterval/__init__.py +0 -0
- {unifiedbooster-0.7.0 → unifiedbooster-0.9.0}/unifiedbooster/predictioninterval/predictioninterval.py +0 -0
- {unifiedbooster-0.7.0 → unifiedbooster-0.9.0}/unifiedbooster/predictionset/__init__.py +0 -0
- {unifiedbooster-0.7.0 → unifiedbooster-0.9.0}/unifiedbooster.egg-info/SOURCES.txt +0 -0
- {unifiedbooster-0.7.0 → unifiedbooster-0.9.0}/unifiedbooster.egg-info/dependency_links.txt +0 -0
- {unifiedbooster-0.7.0 → unifiedbooster-0.9.0}/unifiedbooster.egg-info/entry_points.txt +0 -0
- {unifiedbooster-0.7.0 → unifiedbooster-0.9.0}/unifiedbooster.egg-info/not-zip-safe +0 -0
- {unifiedbooster-0.7.0 → unifiedbooster-0.9.0}/unifiedbooster.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: unifiedbooster
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.9.0
|
|
4
4
|
Summary: Unified interface for Gradient Boosted Decision Trees
|
|
5
5
|
Home-page: https://github.com/thierrymoudiki/unifiedbooster
|
|
6
6
|
Author: T. Moudiki
|
|
@@ -22,8 +22,18 @@ Requires-Dist: numpy
|
|
|
22
22
|
Requires-Dist: scikit-learn
|
|
23
23
|
Requires-Dist: xgboost
|
|
24
24
|
Requires-Dist: lightgbm
|
|
25
|
-
Requires-Dist: catboost
|
|
26
25
|
Requires-Dist: GPopt
|
|
27
26
|
Requires-Dist: nnetsauce
|
|
27
|
+
Dynamic: author
|
|
28
|
+
Dynamic: author-email
|
|
29
|
+
Dynamic: classifier
|
|
30
|
+
Dynamic: description
|
|
31
|
+
Dynamic: home-page
|
|
32
|
+
Dynamic: keywords
|
|
33
|
+
Dynamic: license
|
|
34
|
+
Dynamic: license-file
|
|
35
|
+
Dynamic: requires-dist
|
|
36
|
+
Dynamic: requires-python
|
|
37
|
+
Dynamic: summary
|
|
28
38
|
|
|
29
39
|
Unified interface for Gradient Boosted Decision Trees
|
|
@@ -7,7 +7,10 @@ Unified interface for Gradient Boosted Decision Trees algorithms
|
|
|
7
7
|
|
|
8
8
|
## Examples
|
|
9
9
|
|
|
10
|
-
See also
|
|
10
|
+
See also:
|
|
11
|
+
- Auto XGBoost, Auto LightGBM, Auto CatBoost, Auto GradientBoosting: https://thierrymoudiki.github.io/blog/2024/08/05/python/r/unibooster
|
|
12
|
+
- Prediction sets and prediction intervals for conformalized Auto XGBoost, Auto LightGBM, Auto CatBoost, Auto GradientBoosting: https://thierrymoudiki.github.io/blog/2024/09/02/python/r/conformalized-unibooster
|
|
13
|
+
- Notebooks in [/unifiedbooster/demo](/unifiedbooster/demo)
|
|
11
14
|
|
|
12
15
|
### classification
|
|
13
16
|
|
|
@@ -40,10 +40,10 @@ class GBDTClassifier(GBDT, ClassifierMixin):
|
|
|
40
40
|
|
|
41
41
|
colsample: float
|
|
42
42
|
percentage of features to use at each node split
|
|
43
|
-
|
|
43
|
+
|
|
44
44
|
level: float
|
|
45
45
|
confidence level for prediction sets
|
|
46
|
-
|
|
46
|
+
|
|
47
47
|
pi_method: str
|
|
48
48
|
method for constructing the prediction intervals: 'icp' (inductive conformal), 'tcp' (transductive conformal)
|
|
49
49
|
|
|
@@ -95,6 +95,7 @@ class GBDTClassifier(GBDT, ClassifierMixin):
|
|
|
95
95
|
print(f"Classification Accuracy lightgbm: {accuracy3:.2f}")
|
|
96
96
|
```
|
|
97
97
|
"""
|
|
98
|
+
|
|
98
99
|
def __init__(
|
|
99
100
|
self,
|
|
100
101
|
model_type="xgboost",
|
|
@@ -128,34 +129,42 @@ class GBDTClassifier(GBDT, ClassifierMixin):
|
|
|
128
129
|
|
|
129
130
|
if self.level is not None:
|
|
130
131
|
|
|
131
|
-
if model_type
|
|
132
|
-
self.model = PredictionSet(
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
elif model_type
|
|
144
|
-
self.model = PredictionSet(
|
|
145
|
-
|
|
146
|
-
|
|
132
|
+
if model_type in ("xgboost", "xgb"):
|
|
133
|
+
self.model = PredictionSet(
|
|
134
|
+
XGBClassifier(**self.params),
|
|
135
|
+
level=self.level,
|
|
136
|
+
method=self.pi_method,
|
|
137
|
+
)
|
|
138
|
+
elif model_type in ("catboost", "cb"):
|
|
139
|
+
self.model = PredictionSet(
|
|
140
|
+
CatBoostClassifier(**self.params),
|
|
141
|
+
level=self.level,
|
|
142
|
+
method=self.pi_method,
|
|
143
|
+
)
|
|
144
|
+
elif model_type in ("lightgbm", "lgb"):
|
|
145
|
+
self.model = PredictionSet(
|
|
146
|
+
LGBMClassifier(**self.params),
|
|
147
|
+
level=self.level,
|
|
148
|
+
method=self.pi_method,
|
|
149
|
+
)
|
|
150
|
+
elif model_type in ("gradientboosting", "gb"):
|
|
151
|
+
self.model = PredictionSet(
|
|
152
|
+
GradientBoostingClassifier(**self.params),
|
|
153
|
+
level=self.level,
|
|
154
|
+
method=self.pi_method,
|
|
155
|
+
)
|
|
147
156
|
else:
|
|
148
157
|
raise ValueError(f"Unknown model_type: {model_type}")
|
|
149
|
-
|
|
158
|
+
|
|
150
159
|
else:
|
|
151
160
|
|
|
152
|
-
if model_type
|
|
161
|
+
if model_type in ("xgboost", "xgb"):
|
|
153
162
|
self.model = XGBClassifier(**self.params)
|
|
154
|
-
elif model_type
|
|
163
|
+
elif model_type in ("catboost", "cb"):
|
|
155
164
|
self.model = CatBoostClassifier(**self.params)
|
|
156
|
-
elif model_type
|
|
165
|
+
elif model_type in ("lightgbm", "lgb"):
|
|
157
166
|
self.model = LGBMClassifier(**self.params)
|
|
158
|
-
elif model_type
|
|
167
|
+
elif model_type in ("gradientboosting", "gb"):
|
|
159
168
|
self.model = GradientBoostingClassifier(**self.params)
|
|
160
169
|
else:
|
|
161
170
|
raise ValueError(f"Unknown model_type: {model_type}")
|
|
@@ -40,12 +40,16 @@ class GBDTRegressor(GBDT, RegressorMixin):
|
|
|
40
40
|
|
|
41
41
|
colsample: float
|
|
42
42
|
percentage of features to use at each node split
|
|
43
|
-
|
|
43
|
+
|
|
44
44
|
level: float
|
|
45
45
|
confidence level for prediction sets
|
|
46
46
|
|
|
47
47
|
pi_method: str
|
|
48
48
|
method for constructing the prediction intervals: 'splitconformal', 'localconformal'
|
|
49
|
+
|
|
50
|
+
type_split: a string;
|
|
51
|
+
Only if `level` is not `None`
|
|
52
|
+
"random" (random split of data) or "sequential" (sequential split of data)
|
|
49
53
|
|
|
50
54
|
verbose: int
|
|
51
55
|
controls verbosity (default=0)
|
|
@@ -95,6 +99,7 @@ class GBDTRegressor(GBDT, RegressorMixin):
|
|
|
95
99
|
print(f"Regression Mean Squared Error lightgbm: {mse3:.2f}")
|
|
96
100
|
```
|
|
97
101
|
"""
|
|
102
|
+
|
|
98
103
|
def __init__(
|
|
99
104
|
self,
|
|
100
105
|
model_type="xgboost",
|
|
@@ -105,12 +110,14 @@ class GBDTRegressor(GBDT, RegressorMixin):
|
|
|
105
110
|
colsample=1.0,
|
|
106
111
|
level=None,
|
|
107
112
|
pi_method="splitconformal",
|
|
113
|
+
type_split="random",
|
|
108
114
|
verbose=0,
|
|
109
115
|
seed=123,
|
|
110
116
|
**kwargs,
|
|
111
117
|
):
|
|
112
118
|
|
|
113
|
-
self.type_fit = "regression"
|
|
119
|
+
self.type_fit = "regression"
|
|
120
|
+
self.type_split = type_split
|
|
114
121
|
|
|
115
122
|
super().__init__(
|
|
116
123
|
model_type=model_type,
|
|
@@ -128,34 +135,46 @@ class GBDTRegressor(GBDT, RegressorMixin):
|
|
|
128
135
|
|
|
129
136
|
if self.level is not None:
|
|
130
137
|
|
|
131
|
-
if model_type
|
|
132
|
-
self.model = PredictionInterval(
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
138
|
+
if model_type in ("xgboost", "xgb"):
|
|
139
|
+
self.model = PredictionInterval(
|
|
140
|
+
XGBRegressor(**self.params),
|
|
141
|
+
level=self.level,
|
|
142
|
+
method=self.pi_method,
|
|
143
|
+
type_split=self.type_split
|
|
144
|
+
)
|
|
145
|
+
elif model_type in ("catboost", "cb"):
|
|
146
|
+
self.model = PredictionInterval(
|
|
147
|
+
CatBoostRegressor(**self.params),
|
|
148
|
+
level=self.level,
|
|
149
|
+
method=self.pi_method,
|
|
150
|
+
type_split=self.type_split
|
|
151
|
+
)
|
|
152
|
+
elif model_type in ("lightgbm", "lgb"):
|
|
153
|
+
self.model = PredictionInterval(
|
|
154
|
+
LGBMRegressor(**self.params),
|
|
155
|
+
level=self.level,
|
|
156
|
+
method=self.pi_method,
|
|
157
|
+
type_split=self.type_split
|
|
158
|
+
)
|
|
159
|
+
elif model_type in ("gradientboosting", "gb"):
|
|
160
|
+
self.model = PredictionInterval(
|
|
161
|
+
GradientBoostingRegressor(**self.params),
|
|
162
|
+
level=self.level,
|
|
163
|
+
method=self.pi_method,
|
|
164
|
+
type_split=self.type_split
|
|
165
|
+
)
|
|
147
166
|
else:
|
|
148
167
|
raise ValueError(f"Unknown model_type: {model_type}")
|
|
149
|
-
|
|
150
|
-
else:
|
|
151
|
-
|
|
152
|
-
if model_type
|
|
168
|
+
|
|
169
|
+
else:
|
|
170
|
+
|
|
171
|
+
if model_type in ("xgboost", "xgb"):
|
|
153
172
|
self.model = XGBRegressor(**self.params)
|
|
154
|
-
elif model_type
|
|
173
|
+
elif model_type in ("catboost", "cb"):
|
|
155
174
|
self.model = CatBoostRegressor(**self.params)
|
|
156
|
-
elif model_type
|
|
175
|
+
elif model_type in ("lightgbm", "lgb"):
|
|
157
176
|
self.model = LGBMRegressor(**self.params)
|
|
158
|
-
elif model_type
|
|
177
|
+
elif model_type in ("gradientboosting", "gb"):
|
|
159
178
|
self.model = GradientBoostingRegressor(**self.params)
|
|
160
179
|
else:
|
|
161
180
|
raise ValueError(f"Unknown model_type: {model_type}")
|
|
@@ -108,7 +108,7 @@ class ClassifierAdapter(BaseModelAdapter, ClassifierMixin):
|
|
|
108
108
|
|
|
109
109
|
def _underlying_predict(self, x):
|
|
110
110
|
return self.model.predict_proba(x)
|
|
111
|
-
|
|
111
|
+
|
|
112
112
|
|
|
113
113
|
class RegressorAdapter(BaseModelAdapter, RegressorMixin):
|
|
114
114
|
def __init__(self, model, fit_params=None):
|
|
@@ -8,7 +8,13 @@ from sklearn.ensemble import ExtraTreesRegressor
|
|
|
8
8
|
from sklearn.preprocessing import StandardScaler
|
|
9
9
|
from scipy.stats import gaussian_kde
|
|
10
10
|
from tqdm import tqdm
|
|
11
|
-
from ..nonconformist import
|
|
11
|
+
from ..nonconformist import (
|
|
12
|
+
ClassifierAdapter,
|
|
13
|
+
IcpClassifier,
|
|
14
|
+
TcpClassifier,
|
|
15
|
+
ClassifierNc,
|
|
16
|
+
MarginErrFunc,
|
|
17
|
+
)
|
|
12
18
|
|
|
13
19
|
|
|
14
20
|
class PredictionSet(BaseEstimator, ClassifierMixin):
|
|
@@ -47,21 +53,18 @@ class PredictionSet(BaseEstimator, ClassifierMixin):
|
|
|
47
53
|
self.alpha_ = 1 - self.level / 100
|
|
48
54
|
self.quantile_ = None
|
|
49
55
|
self.icp_ = None
|
|
50
|
-
self.tcp_ = None
|
|
56
|
+
self.tcp_ = None
|
|
51
57
|
|
|
52
58
|
if self.method == "icp":
|
|
53
|
-
self.icp_ = IcpClassifier(
|
|
59
|
+
self.icp_ = IcpClassifier(
|
|
54
60
|
ClassifierNc(ClassifierAdapter(self.obj), MarginErrFunc()),
|
|
55
61
|
)
|
|
56
62
|
elif self.method == "tcp":
|
|
57
|
-
self.tcp_ = TcpClassifier(
|
|
63
|
+
self.tcp_ = TcpClassifier(
|
|
58
64
|
ClassifierNc(ClassifierAdapter(self.obj), MarginErrFunc()),
|
|
59
|
-
)
|
|
60
|
-
else:
|
|
61
|
-
raise ValueError(
|
|
62
|
-
"`self.method` must be in ('icp', 'tcp')"
|
|
63
65
|
)
|
|
64
|
-
|
|
66
|
+
else:
|
|
67
|
+
raise ValueError("`self.method` must be in ('icp', 'tcp')")
|
|
65
68
|
|
|
66
69
|
def fit(self, X, y):
|
|
67
70
|
"""Fit the `method` to training data (X, y).
|
|
@@ -74,13 +77,14 @@ class PredictionSet(BaseEstimator, ClassifierMixin):
|
|
|
74
77
|
|
|
75
78
|
y: array-like, shape = [n_samples, ]; Target values.
|
|
76
79
|
|
|
77
|
-
"""
|
|
80
|
+
"""
|
|
78
81
|
if self.method == "icp":
|
|
79
82
|
|
|
80
83
|
X_train, X_calibration, y_train, y_calibration = train_test_split(
|
|
81
|
-
|
|
84
|
+
X, y, test_size=0.5, random_state=self.seed
|
|
85
|
+
)
|
|
82
86
|
self.icp_.fit(X_train, y_train)
|
|
83
|
-
self.icp_.calibrate(X_calibration, y_calibration)
|
|
87
|
+
self.icp_.calibrate(X_calibration, y_calibration)
|
|
84
88
|
|
|
85
89
|
elif self.method == "tcp":
|
|
86
90
|
|
|
@@ -101,11 +105,9 @@ class PredictionSet(BaseEstimator, ClassifierMixin):
|
|
|
101
105
|
|
|
102
106
|
if self.method == "icp":
|
|
103
107
|
return self.icp_.predict(X, significance=self.alpha_)
|
|
104
|
-
|
|
108
|
+
|
|
105
109
|
elif self.method == "tcp":
|
|
106
110
|
return self.tcp_.predict(X, significance=self.alpha_)
|
|
107
|
-
|
|
111
|
+
|
|
108
112
|
else:
|
|
109
|
-
raise ValueError(
|
|
110
|
-
"`self.method` must be in ('icp', 'tcp')"
|
|
111
|
-
)
|
|
113
|
+
raise ValueError("`self.method` must be in ('icp', 'tcp')")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: unifiedbooster
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.9.0
|
|
4
4
|
Summary: Unified interface for Gradient Boosted Decision Trees
|
|
5
5
|
Home-page: https://github.com/thierrymoudiki/unifiedbooster
|
|
6
6
|
Author: T. Moudiki
|
|
@@ -22,8 +22,18 @@ Requires-Dist: numpy
|
|
|
22
22
|
Requires-Dist: scikit-learn
|
|
23
23
|
Requires-Dist: xgboost
|
|
24
24
|
Requires-Dist: lightgbm
|
|
25
|
-
Requires-Dist: catboost
|
|
26
25
|
Requires-Dist: GPopt
|
|
27
26
|
Requires-Dist: nnetsauce
|
|
27
|
+
Dynamic: author
|
|
28
|
+
Dynamic: author-email
|
|
29
|
+
Dynamic: classifier
|
|
30
|
+
Dynamic: description
|
|
31
|
+
Dynamic: home-page
|
|
32
|
+
Dynamic: keywords
|
|
33
|
+
Dynamic: license
|
|
34
|
+
Dynamic: license-file
|
|
35
|
+
Dynamic: requires-dist
|
|
36
|
+
Dynamic: requires-python
|
|
37
|
+
Dynamic: summary
|
|
28
38
|
|
|
29
39
|
Unified interface for Gradient Boosted Decision Trees
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|