unifiedbooster 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ from .gbdt import GBDT
2
+ from .gbdt_classification import GBDTClassifier
3
+ from .gbdt_regression import GBDTRegressor
4
+ from .gpoptimization import cross_val_optim, lazy_cross_val_optim
5
+
6
+ __all__ = [
7
+ "GBDT",
8
+ "GBDTClassifier",
9
+ "GBDTRegressor",
10
+ "cross_val_optim",
11
+ "lazy_cross_val_optim",
12
+ ]
unifiedbooster/gbdt.py ADDED
@@ -0,0 +1,161 @@
1
+ import numpy as np
2
+ from sklearn.base import BaseEstimator
3
+
4
+
5
+ class GBDT(BaseEstimator):
6
+ """Gradient Boosted Decision Trees (GBDT) base class
7
+
8
+ Attributes:
9
+
10
+ model_type: str
11
+ type of gradient boosting algorithm: 'xgboost', 'lightgbm',
12
+ 'catboost', 'gradientboosting'
13
+
14
+ n_estimators: int
15
+ maximum number of trees that can be built
16
+
17
+ learning_rate: float
18
+ shrinkage rate; used for reducing the gradient step
19
+
20
+ max_depth: int
21
+ maximum tree depth
22
+
23
+ rowsample: float
24
+ subsample ratio of the training instances
25
+
26
+ colsample: float
27
+ percentage of features to use at each node split
28
+
29
+ verbose: int
30
+ controls verbosity (default=0)
31
+
32
+ seed: int
33
+ reproducibility seed
34
+
35
+ **kwargs: dict
36
+ additional parameters to be passed to the class
37
+ """
38
+
39
+ def __init__(
40
+ self,
41
+ model_type="xgboost",
42
+ n_estimators=100,
43
+ learning_rate=0.1,
44
+ max_depth=3,
45
+ rowsample=1.0,
46
+ colsample=1.0,
47
+ level=None,
48
+ pi_method=None,
49
+ verbose=0,
50
+ seed=123,
51
+ **kwargs
52
+ ):
53
+
54
+ self.model_type = model_type
55
+ self.n_estimators = n_estimators
56
+ self.learning_rate = learning_rate
57
+ self.max_depth = max_depth
58
+ self.rowsample = rowsample
59
+ self.colsample = colsample
60
+ self.level = level
61
+ self.pi_method = pi_method
62
+ self.verbose = verbose
63
+ self.seed = seed
64
+
65
+ if self.model_type == "xgboost":
66
+ self.params = {
67
+ "n_estimators": self.n_estimators,
68
+ "learning_rate": self.learning_rate,
69
+ "subsample": self.rowsample,
70
+ "colsample_bynode": self.colsample,
71
+ "max_depth": self.max_depth,
72
+ "verbosity": self.verbose,
73
+ "seed": self.seed,
74
+ **kwargs,
75
+ }
76
+ elif self.model_type == "lightgbm":
77
+ verbose = self.verbose - 1 if self.verbose == 0 else self.verbose
78
+ self.params = {
79
+ "n_estimators": self.n_estimators,
80
+ "learning_rate": self.learning_rate,
81
+ "subsample": self.rowsample,
82
+ "feature_fraction_bynode": self.colsample,
83
+ "max_depth": self.max_depth,
84
+ "verbose": verbose, # keep this way
85
+ "seed": self.seed,
86
+ **kwargs,
87
+ }
88
+ elif self.model_type == "catboost":
89
+ self.params = {
90
+ "iterations": self.n_estimators,
91
+ "learning_rate": self.learning_rate,
92
+ "subsample": self.rowsample,
93
+ "rsm": self.colsample,
94
+ "depth": self.max_depth,
95
+ "verbose": self.verbose,
96
+ "random_seed": self.seed,
97
+ "boosting_type": "Plain",
98
+ "leaf_estimation_iterations": 1,
99
+ "bootstrap_type": "Bernoulli",
100
+ **kwargs,
101
+ }
102
+ elif self.model_type == "gradientboosting":
103
+ self.params = {
104
+ "n_estimators": self.n_estimators,
105
+ "learning_rate": self.learning_rate,
106
+ "subsample": self.rowsample,
107
+ "max_features": self.colsample,
108
+ "max_depth": self.max_depth,
109
+ "verbose": self.verbose,
110
+ "random_state": self.seed,
111
+ **kwargs,
112
+ }
113
+
114
+ def fit(self, X, y, **kwargs):
115
+ """Fit custom model to training data (X, y).
116
+
117
+ Parameters:
118
+
119
+ X: {array-like}, shape = [n_samples, n_features]
120
+ Training vectors, where n_samples is the number
121
+ of samples and n_features is the number of features.
122
+
123
+ y: array-like, shape = [n_samples]
124
+ Target values.
125
+
126
+ **kwargs: additional parameters to be passed to
127
+ self.cook_training_set or self.obj.fit
128
+
129
+ Returns:
130
+
131
+ self: object
132
+ """
133
+ if getattr(self, "type_fit") == "classification":
134
+ self.classes_ = np.unique(y) # for compatibility with sklearn
135
+ self.n_classes_ = len(
136
+ self.classes_
137
+ ) # for compatibility with sklearn
138
+ if getattr(self, "model_type") == "gradientboosting":
139
+ self.model.max_features = int(self.model.max_features * X.shape[1])
140
+ return getattr(self, "model").fit(X, y, **kwargs)
141
+
142
+ def predict(self, X):
143
+ """Predict test data X.
144
+
145
+ Parameters:
146
+
147
+ X: {array-like}, shape = [n_samples, n_features]
148
+ Training vectors, where n_samples is the number
149
+ of samples and n_features is the number of features.
150
+
151
+ **kwargs: additional parameters to be passed to
152
+ self.cook_test_set
153
+
154
+ Returns:
155
+
156
+ model predictions: {array-like}
157
+ """
158
+ if self.level is not None and self.type_fit == "regression":
159
+ return getattr(self, "model").predict(X, return_pi=True)
160
+ else:
161
+ return getattr(self, "model").predict(X)
@@ -0,0 +1,188 @@
1
+ from .gbdt import GBDT
2
+ from sklearn.base import ClassifierMixin
3
+ from .predictionset import PredictionSet
4
+
5
+ try:
6
+ from xgboost import XGBClassifier
7
+ except:
8
+ pass
9
+ try:
10
+ from catboost import CatBoostClassifier
11
+ except:
12
+ pass
13
+ try:
14
+ from lightgbm import LGBMClassifier
15
+ except:
16
+ pass
17
+ from sklearn.ensemble import GradientBoostingClassifier
18
+
19
+
20
+ class GBDTClassifier(GBDT, ClassifierMixin):
21
+ """GBDT Classification model
22
+
23
+ Attributes:
24
+
25
+ model_type: str
26
+ type of gradient boosting algorithm: 'xgboost', 'lightgbm',
27
+ 'catboost', 'gradientboosting'
28
+
29
+ n_estimators: int
30
+ maximum number of trees that can be built
31
+
32
+ learning_rate: float
33
+ shrinkage rate; used for reducing the gradient step
34
+
35
+ max_depth: int
36
+ maximum tree depth
37
+
38
+ rowsample: float
39
+ subsample ratio of the training instances
40
+
41
+ colsample: float
42
+ percentage of features to use at each node split
43
+
44
+ level: float
45
+ confidence level for prediction sets
46
+
47
+ pi_method: str
48
+ method for constructing the prediction intervals: 'icp' (inductive conformal), 'tcp' (transductive conformal)
49
+
50
+ verbose: int
51
+ controls verbosity (default=0)
52
+
53
+ seed: int
54
+ reproducibility seed
55
+
56
+ **kwargs: dict
57
+ additional parameters to be passed to the class
58
+
59
+ Examples:
60
+
61
+ ```python
62
+ import unifiedbooster as ub
63
+ from sklearn.datasets import load_iris
64
+ from sklearn.model_selection import train_test_split
65
+ from sklearn.metrics import accuracy_score
66
+
67
+ # Load dataset
68
+ iris = load_iris()
69
+ X, y = iris.data, iris.target
70
+
71
+ # Split dataset into training and testing sets
72
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
73
+
74
+ # Initialize the unified regressor (example with XGBoost)
75
+ regressor1 = ub.GBDTClassifier(model_type='xgboost')
76
+ #regressor2 = ub.GBDTClassifier(model_type='catboost')
77
+ regressor3 = ub.GBDTClassifier(model_type='lightgbm')
78
+
79
+ # Fit the model
80
+ regressor1.fit(X_train, y_train)
81
+ #regressor2.fit(X_train, y_train)
82
+ regressor3.fit(X_train, y_train)
83
+
84
+ # Predict on the test set
85
+ y_pred1 = regressor1.predict(X_test)
86
+ #y_pred2 = regressor2.predict(X_test)
87
+ y_pred3 = regressor3.predict(X_test)
88
+
89
+ # Evaluate the model
90
+ accuracy1 = accuracy_score(y_test, y_pred1)
91
+ #accuracy2 = accuracy_score(y_test, y_pred2)
92
+ accuracy3 = accuracy_score(y_test, y_pred3)
93
+ print(f"Classification Accuracy xgboost: {accuracy1:.2f}")
94
+ #print(f"Classification Accuracy catboost: {accuracy2:.2f}")
95
+ print(f"Classification Accuracy lightgbm: {accuracy3:.2f}")
96
+ ```
97
+ """
98
+
99
+ def __init__(
100
+ self,
101
+ model_type="xgboost",
102
+ n_estimators=100,
103
+ learning_rate=0.1,
104
+ max_depth=3,
105
+ rowsample=1.0,
106
+ colsample=1.0,
107
+ level=None,
108
+ pi_method="icp",
109
+ verbose=0,
110
+ seed=123,
111
+ **kwargs,
112
+ ):
113
+
114
+ self.type_fit = "classification"
115
+
116
+ super().__init__(
117
+ model_type=model_type,
118
+ n_estimators=n_estimators,
119
+ learning_rate=learning_rate,
120
+ max_depth=max_depth,
121
+ rowsample=rowsample,
122
+ colsample=colsample,
123
+ level=level,
124
+ pi_method=pi_method,
125
+ verbose=verbose,
126
+ seed=seed,
127
+ **kwargs,
128
+ )
129
+
130
+ if self.level is not None:
131
+
132
+ if model_type in ("xgboost", "xgb"):
133
+ self.model = PredictionSet(
134
+ XGBClassifier(**self.params),
135
+ level=self.level,
136
+ method=self.pi_method,
137
+ )
138
+ elif model_type in ("catboost", "cb"):
139
+ self.model = PredictionSet(
140
+ CatBoostClassifier(**self.params),
141
+ level=self.level,
142
+ method=self.pi_method,
143
+ )
144
+ elif model_type in ("lightgbm", "lgb"):
145
+ self.model = PredictionSet(
146
+ LGBMClassifier(**self.params),
147
+ level=self.level,
148
+ method=self.pi_method,
149
+ )
150
+ elif model_type in ("gradientboosting", "gb"):
151
+ self.model = PredictionSet(
152
+ GradientBoostingClassifier(**self.params),
153
+ level=self.level,
154
+ method=self.pi_method,
155
+ )
156
+ else:
157
+ raise ValueError(f"Unknown model_type: {model_type}")
158
+
159
+ else:
160
+
161
+ if model_type in ("xgboost", "xgb"):
162
+ self.model = XGBClassifier(**self.params)
163
+ elif model_type in ("catboost", "cb"):
164
+ self.model = CatBoostClassifier(**self.params)
165
+ elif model_type in ("lightgbm", "lgb"):
166
+ self.model = LGBMClassifier(**self.params)
167
+ elif model_type in ("gradientboosting", "gb"):
168
+ self.model = GradientBoostingClassifier(**self.params)
169
+ else:
170
+ raise ValueError(f"Unknown model_type: {model_type}")
171
+
172
+ def predict_proba(self, X):
173
+ """Predict probabilities for test data X.
174
+
175
+ Args:
176
+
177
+ X: {array-like}, shape = [n_samples, n_features]
178
+ Training vectors, where n_samples is the number
179
+ of samples and n_features is the number of features.
180
+
181
+ **kwargs: additional parameters to be passed to
182
+ self.cook_test_set
183
+
184
+ Returns:
185
+
186
+ probability estimates for test data: {array-like}
187
+ """
188
+ return self.model.predict_proba(X)
@@ -0,0 +1,180 @@
1
+ from .gbdt import GBDT
2
+ from sklearn.base import RegressorMixin
3
+ from .predictioninterval import PredictionInterval
4
+
5
+ try:
6
+ from xgboost import XGBRegressor
7
+ except:
8
+ pass
9
+ try:
10
+ from catboost import CatBoostRegressor
11
+ except:
12
+ pass
13
+ try:
14
+ from lightgbm import LGBMRegressor
15
+ except:
16
+ pass
17
+ from sklearn.ensemble import GradientBoostingRegressor
18
+
19
+
20
+ class GBDTRegressor(GBDT, RegressorMixin):
21
+ """GBDT Regression model
22
+
23
+ Attributes:
24
+
25
+ model_type: str
26
+ type of gradient boosting algorithm: 'xgboost', 'lightgbm',
27
+ 'catboost', 'gradientboosting'
28
+
29
+ n_estimators: int
30
+ maximum number of trees that can be built
31
+
32
+ learning_rate: float
33
+ shrinkage rate; used for reducing the gradient step
34
+
35
+ max_depth: int
36
+ maximum tree depth
37
+
38
+ rowsample: float
39
+ subsample ratio of the training instances
40
+
41
+ colsample: float
42
+ percentage of features to use at each node split
43
+
44
+ level: float
45
+ confidence level for prediction sets
46
+
47
+ pi_method: str
48
+ method for constructing the prediction intervals: 'splitconformal', 'localconformal'
49
+
50
+ type_split: a string;
51
+ Only if `level` is not `None`
52
+ "random" (random split of data) or "sequential" (sequential split of data)
53
+
54
+ verbose: int
55
+ controls verbosity (default=0)
56
+
57
+ seed: int
58
+ reproducibility seed
59
+
60
+ **kwargs: dict
61
+ additional parameters to be passed to the class
62
+
63
+ Examples:
64
+
65
+ ```python
66
+ import unifiedbooster as ub
67
+ from sklearn.datasets import fetch_california_housing
68
+ from sklearn.model_selection import train_test_split
69
+ from sklearn.metrics import mean_squared_error
70
+
71
+ # Load dataset
72
+ housing = fetch_california_housing()
73
+ X, y = housing.data, housing.target
74
+
75
+ # Split dataset into training and testing sets
76
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
77
+
78
+ # Initialize the unified regressor (example with XGBoost)
79
+ regressor1 = ub.GBDTRegressor(model_type='xgboost')
80
+ #regressor2 = ub.GBDTRegressor(model_type='catboost')
81
+ regressor3 = ub.GBDTRegressor(model_type='lightgbm')
82
+
83
+ # Fit the model
84
+ regressor1.fit(X_train, y_train)
85
+ #regressor2.fit(X_train, y_train)
86
+ regressor3.fit(X_train, y_train)
87
+
88
+ # Predict on the test set
89
+ y_pred1 = regressor1.predict(X_test)
90
+ #y_pred2 = regressor2.predict(X_test)
91
+ y_pred3 = regressor3.predict(X_test)
92
+
93
+ # Evaluate the model
94
+ mse1 = mean_squared_error(y_test, y_pred1)
95
+ #mse2 = mean_squared_error(y_test, y_pred2)
96
+ mse3 = mean_squared_error(y_test, y_pred3)
97
+ print(f"Regression Mean Squared Error xgboost: {mse1:.2f}")
98
+ #print(f"Regression Mean Squared Error catboost: {mse2:.2f}")
99
+ print(f"Regression Mean Squared Error lightgbm: {mse3:.2f}")
100
+ ```
101
+ """
102
+
103
+ def __init__(
104
+ self,
105
+ model_type="xgboost",
106
+ n_estimators=100,
107
+ learning_rate=0.1,
108
+ max_depth=3,
109
+ rowsample=1.0,
110
+ colsample=1.0,
111
+ level=None,
112
+ pi_method="splitconformal",
113
+ type_split="random",
114
+ verbose=0,
115
+ seed=123,
116
+ **kwargs,
117
+ ):
118
+
119
+ self.type_fit = "regression"
120
+ self.type_split = type_split
121
+
122
+ super().__init__(
123
+ model_type=model_type,
124
+ n_estimators=n_estimators,
125
+ learning_rate=learning_rate,
126
+ max_depth=max_depth,
127
+ rowsample=rowsample,
128
+ colsample=colsample,
129
+ level=level,
130
+ pi_method=pi_method,
131
+ verbose=verbose,
132
+ seed=seed,
133
+ **kwargs,
134
+ )
135
+
136
+ if self.level is not None:
137
+
138
+ if model_type in ("xgboost", "xgb"):
139
+ self.model = PredictionInterval(
140
+ XGBRegressor(**self.params),
141
+ level=self.level,
142
+ method=self.pi_method,
143
+ type_split=self.type_split
144
+ )
145
+ elif model_type in ("catboost", "cb"):
146
+ self.model = PredictionInterval(
147
+ CatBoostRegressor(**self.params),
148
+ level=self.level,
149
+ method=self.pi_method,
150
+ type_split=self.type_split
151
+ )
152
+ elif model_type in ("lightgbm", "lgb"):
153
+ self.model = PredictionInterval(
154
+ LGBMRegressor(**self.params),
155
+ level=self.level,
156
+ method=self.pi_method,
157
+ type_split=self.type_split
158
+ )
159
+ elif model_type in ("gradientboosting", "gb"):
160
+ self.model = PredictionInterval(
161
+ GradientBoostingRegressor(**self.params),
162
+ level=self.level,
163
+ method=self.pi_method,
164
+ type_split=self.type_split
165
+ )
166
+ else:
167
+ raise ValueError(f"Unknown model_type: {model_type}")
168
+
169
+ else:
170
+
171
+ if model_type in ("xgboost", "xgb"):
172
+ self.model = XGBRegressor(**self.params)
173
+ elif model_type in ("catboost", "cb"):
174
+ self.model = CatBoostRegressor(**self.params)
175
+ elif model_type in ("lightgbm", "lgb"):
176
+ self.model = LGBMRegressor(**self.params)
177
+ elif model_type in ("gradientboosting", "gb"):
178
+ self.model = GradientBoostingRegressor(**self.params)
179
+ else:
180
+ raise ValueError(f"Unknown model_type: {model_type}")