unifiedbooster 0.1.2__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unifiedbooster
3
- Version: 0.1.2
4
- Summary: Call R functions from Python
3
+ Version: 0.2.0
4
+ Summary: Unified interface for Gradient Boosted Decision Trees
5
5
  Home-page: https://github.com/thierrymoudiki/unifiedbooster
6
6
  Author: T. Moudiki
7
7
  Author-email: thierry.moudiki@gmail.com
@@ -17,9 +17,11 @@ Classifier: Programming Language :: Python :: 3.7
17
17
  Classifier: Programming Language :: Python :: 3.8
18
18
  Requires-Python: >=3.6
19
19
  Requires-Dist: Cython
20
+ Requires-Dist: numpy
20
21
  Requires-Dist: scikit-learn
21
22
  Requires-Dist: xgboost
22
23
  Requires-Dist: lightgbm
23
24
  Requires-Dist: catboost
25
+ Requires-Dist: GPopt
24
26
 
25
27
  Unified interface for Gradient Boosted Decision Trees
@@ -10,7 +10,7 @@ from os import path
10
10
 
11
11
  subprocess.check_call(['pip', 'install', 'Cython'])
12
12
 
13
- __version__ = "0.1.2"
13
+ __version__ = "0.2.0"
14
14
 
15
15
  here = path.abspath(path.dirname(__file__))
16
16
 
@@ -44,7 +44,7 @@ setup(
44
44
  'Programming Language :: Python :: 3.7',
45
45
  'Programming Language :: Python :: 3.8',
46
46
  ],
47
- description="Call R functions from Python",
47
+ description="Unified interface for Gradient Boosted Decision Trees",
48
48
  entry_points={
49
49
  'console_scripts': [
50
50
  'unifiedbooster=unifiedbooster.cli:main',
@@ -0,0 +1,68 @@
1
+ import numpy as np
2
+ from sklearn.base import BaseEstimator
3
+
4
+
5
+ class GBDT(BaseEstimator):
6
+ def __init__(self,
7
+ model_type='xgboost',
8
+ n_estimators=100,
9
+ learning_rate=0.1,
10
+ max_depth=3,
11
+ rowsample=1.0,
12
+ colsample=1.0,
13
+ verbose=0,
14
+ seed=123,
15
+ **kwargs):
16
+
17
+ self.model_type = model_type
18
+ self.n_estimators = n_estimators
19
+ self.learning_rate = learning_rate
20
+ self.max_depth = max_depth
21
+ self.rowsample = rowsample
22
+ self.colsample = colsample
23
+ self.verbose = verbose
24
+ self.seed = seed
25
+
26
+ if self.model_type == "xgboost":
27
+ self.params = {
28
+ 'n_estimators': self.n_estimators,
29
+ 'learning_rate': self.learning_rate,
30
+ 'subsample': self.rowsample,
31
+ 'colsample_bynode': self.colsample,
32
+ 'max_depth': self.max_depth,
33
+ 'verbosity': self.verbose,
34
+ 'seed': self.seed,
35
+ **kwargs
36
+ }
37
+ elif self.model_type == "lightgbm":
38
+ verbose = self.verbose - 1 if self.verbose==0 else self.verbose
39
+ self.params = {
40
+ 'n_estimators': self.n_estimators,
41
+ 'learning_rate': self.learning_rate,
42
+ 'subsample': self.rowsample,
43
+ 'feature_fraction_bynode': self.colsample,
44
+ 'max_depth': self.max_depth,
45
+ 'verbose': verbose, # keep this way
46
+ 'seed': self.seed,
47
+ **kwargs
48
+ }
49
+ elif self.model_type == "catboost":
50
+ self.params = {
51
+ 'iterations': self.n_estimators,
52
+ 'learning_rate': self.learning_rate,
53
+ 'subsample': self.rowsample,
54
+ 'rsm': self.colsample,
55
+ 'depth': self.max_depth,
56
+ 'verbose': self.verbose,
57
+ 'random_seed': self.seed,
58
+ **kwargs
59
+ }
60
+
61
+ def fit(self, X, y, **kwargs):
62
+ if getattr(self, "type_fit") == "classification":
63
+ self.classes_ = np.unique(y) # for compatibility with sklearn
64
+ self.n_classes_ = len(self.classes_) # for compatibility with sklearn
65
+ return getattr(self, "model").fit(X, y, **kwargs)
66
+
67
+ def predict(self, X):
68
+ return getattr(self, "model").predict(X)
@@ -0,0 +1,106 @@
1
+ from .gbdt import GBDT
2
+ from sklearn.base import ClassifierMixin
3
+ from xgboost import XGBClassifier
4
+ from catboost import CatBoostClassifier
5
+ from lightgbm import LGBMClassifier
6
+
7
+
8
+ class GBDTClassifier(GBDT, ClassifierMixin):
9
+ """GBDT Classification model
10
+
11
+ Attributes:
12
+
13
+ n_estimators: int
14
+ maximum number of trees that can be built
15
+
16
+ learning_rate: float
17
+ shrinkage rate; used for reducing the gradient step
18
+
19
+ rowsample: float
20
+ subsample ratio of the training instances
21
+
22
+ colsample: float
23
+ percentage of features to use at each node split
24
+
25
+ verbose: int
26
+ controls verbosity (default=0)
27
+
28
+ seed: int
29
+ reproducibility seed
30
+
31
+ Examples:
32
+
33
+ ```python
34
+ import unifiedbooster as ub
35
+ from sklearn.datasets import load_iris
36
+ from sklearn.model_selection import train_test_split
37
+ from sklearn.metrics import accuracy_score
38
+
39
+ # Load dataset
40
+ iris = load_iris()
41
+ X, y = iris.data, iris.target
42
+
43
+ # Split dataset into training and testing sets
44
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
45
+
46
+ # Initialize the unified regressor (example with XGBoost)
47
+ regressor1 = ub.GBDTClassifier(model_type='xgboost')
48
+ #regressor2 = ub.GBDTClassifier(model_type='catboost')
49
+ regressor3 = ub.GBDTClassifier(model_type='lightgbm')
50
+
51
+ # Fit the model
52
+ regressor1.fit(X_train, y_train)
53
+ #regressor2.fit(X_train, y_train)
54
+ regressor3.fit(X_train, y_train)
55
+
56
+ # Predict on the test set
57
+ y_pred1 = regressor1.predict(X_test)
58
+ #y_pred2 = regressor2.predict(X_test)
59
+ y_pred3 = regressor3.predict(X_test)
60
+
61
+ # Evaluate the model
62
+ accuracy1 = accuracy_score(y_test, y_pred1)
63
+ #accuracy2 = accuracy_score(y_test, y_pred2)
64
+ accuracy3 = accuracy_score(y_test, y_pred3)
65
+ print(f"Classification Accuracy xgboost: {accuracy1:.2f}")
66
+ #print(f"Classification Accuracy catboost: {accuracy2:.2f}")
67
+ print(f"Classification Accuracy lightgbm: {accuracy3:.2f}")
68
+ ```
69
+ """
70
+
71
+ def __init__(self,
72
+ model_type='xgboost',
73
+ n_estimators=100,
74
+ learning_rate=0.1,
75
+ max_depth=3,
76
+ rowsample=1.0,
77
+ colsample=1.0,
78
+ verbose=0,
79
+ seed=123,
80
+ **kwargs):
81
+
82
+ self.type_fit = "classification"
83
+
84
+ super().__init__(
85
+ model_type=model_type,
86
+ n_estimators=n_estimators,
87
+ learning_rate=learning_rate,
88
+ max_depth=max_depth,
89
+ rowsample=rowsample,
90
+ colsample=colsample,
91
+ verbose=verbose,
92
+ seed=seed,
93
+ **kwargs
94
+ )
95
+
96
+ if model_type == 'xgboost':
97
+ self.model = XGBClassifier(**self.params)
98
+ elif model_type == 'catboost':
99
+ self.model = CatBoostClassifier(**self.params)
100
+ elif model_type == 'lightgbm':
101
+ self.model = LGBMClassifier(**self.params)
102
+ else:
103
+ raise ValueError(f"Unknown model_type: {model_type}")
104
+
105
+ def predict_proba(self, X):
106
+ return self.model.predict_proba(X)
@@ -0,0 +1,103 @@
1
+ from .gbdt import GBDT
2
+ from sklearn.base import RegressorMixin
3
+ from xgboost import XGBRegressor
4
+ from catboost import CatBoostRegressor
5
+ from lightgbm import LGBMRegressor
6
+
7
+
8
+ class GBDTRegressor(GBDT, RegressorMixin):
9
+ """GBDT Regression model
10
+
11
+ Attributes:
12
+
13
+ n_estimators: int
14
+ maximum number of trees that can be built
15
+
16
+ learning_rate: float
17
+ shrinkage rate; used for reducing the gradient step
18
+
19
+ rowsample: float
20
+ subsample ratio of the training instances
21
+
22
+ colsample: float
23
+ percentage of features to use at each node split
24
+
25
+ verbose: int
26
+ controls verbosity (default=0)
27
+
28
+ seed: int
29
+ reproducibility seed
30
+
31
+ Examples:
32
+
33
+ ```python
34
+ import unifiedbooster as ub
35
+ from sklearn.datasets import fetch_california_housing
36
+ from sklearn.model_selection import train_test_split
37
+ from sklearn.metrics import mean_squared_error
38
+
39
+ # Load dataset
40
+ housing = fetch_california_housing()
41
+ X, y = housing.data, housing.target
42
+
43
+ # Split dataset into training and testing sets
44
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
45
+
46
+ # Initialize the unified regressor (example with XGBoost)
47
+ regressor1 = ub.GBDTRegressor(model_type='xgboost')
48
+ #regressor2 = ub.GBDTRegressor(model_type='catboost')
49
+ regressor3 = ub.GBDTRegressor(model_type='lightgbm')
50
+
51
+ # Fit the model
52
+ regressor1.fit(X_train, y_train)
53
+ #regressor2.fit(X_train, y_train)
54
+ regressor3.fit(X_train, y_train)
55
+
56
+ # Predict on the test set
57
+ y_pred1 = regressor1.predict(X_test)
58
+ #y_pred2 = regressor2.predict(X_test)
59
+ y_pred3 = regressor3.predict(X_test)
60
+
61
+ # Evaluate the model
62
+ mse1 = mean_squared_error(y_test, y_pred1)
63
+ #mse2 = mean_squared_error(y_test, y_pred2)
64
+ mse3 = mean_squared_error(y_test, y_pred3)
65
+ print(f"Regression Mean Squared Error xgboost: {mse1:.2f}")
66
+ #print(f"Regression Mean Squared Error catboost: {mse2:.2f}")
67
+ print(f"Regression Mean Squared Error lightgbm: {mse3:.2f}")
68
+ ```
69
+ """
70
+
71
+ def __init__(self,
72
+ model_type='xgboost',
73
+ n_estimators=100,
74
+ learning_rate=0.1,
75
+ max_depth=3,
76
+ rowsample=1.0,
77
+ colsample=1.0,
78
+ verbose=0,
79
+ seed=123,
80
+ **kwargs):
81
+
82
+ self.type_fit = "regression"
83
+
84
+ super().__init__(
85
+ model_type=model_type,
86
+ n_estimators=n_estimators,
87
+ learning_rate=learning_rate,
88
+ max_depth=max_depth,
89
+ rowsample=rowsample,
90
+ colsample=colsample,
91
+ verbose=verbose,
92
+ seed=seed,
93
+ **kwargs
94
+ )
95
+
96
+ if model_type == 'xgboost':
97
+ self.model = XGBRegressor(**self.params)
98
+ elif model_type == 'catboost':
99
+ self.model = CatBoostRegressor(**self.params)
100
+ elif model_type == 'lightgbm':
101
+ self.model = LGBMRegressor(**self.params)
102
+ else:
103
+ raise ValueError(f"Unknown model_type: {model_type}")
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unifiedbooster
3
- Version: 0.1.2
4
- Summary: Call R functions from Python
3
+ Version: 0.2.0
4
+ Summary: Unified interface for Gradient Boosted Decision Trees
5
5
  Home-page: https://github.com/thierrymoudiki/unifiedbooster
6
6
  Author: T. Moudiki
7
7
  Author-email: thierry.moudiki@gmail.com
@@ -17,9 +17,11 @@ Classifier: Programming Language :: Python :: 3.7
17
17
  Classifier: Programming Language :: Python :: 3.8
18
18
  Requires-Python: >=3.6
19
19
  Requires-Dist: Cython
20
+ Requires-Dist: numpy
20
21
  Requires-Dist: scikit-learn
21
22
  Requires-Dist: xgboost
22
23
  Requires-Dist: lightgbm
23
24
  Requires-Dist: catboost
25
+ Requires-Dist: GPopt
24
26
 
25
27
  Unified interface for Gradient Boosted Decision Trees
@@ -1,6 +1,7 @@
1
1
  README.md
2
2
  setup.py
3
3
  unifiedbooster/__init__.py
4
+ unifiedbooster/gbdt.py
4
5
  unifiedbooster/gbdt_classification.py
5
6
  unifiedbooster/gbdt_regression.py
6
7
  unifiedbooster.egg-info/PKG-INFO
@@ -1,5 +1,7 @@
1
1
  Cython
2
+ numpy
2
3
  scikit-learn
3
4
  xgboost
4
5
  lightgbm
5
6
  catboost
7
+ GPopt
@@ -1,81 +0,0 @@
1
- from sklearn.base import BaseEstimator, ClassifierMixin
2
- from xgboost import XGBClassifier
3
- from catboost import CatBoostClassifier
4
- from lightgbm import LGBMClassifier
5
-
6
-
7
- class GBDTClassifier(BaseEstimator, ClassifierMixin):
8
- def __init__(self, model_type='xgboost',
9
- n_estimators=100,
10
- learning_rate=0.1,
11
- max_depth=3,
12
- subsample=1.0,
13
- verbosity=0,
14
- **kwargs):
15
- self.model_type = model_type
16
- self.n_estimators = n_estimators
17
- self.learning_rate = learning_rate
18
- self.max_depth = max_depth
19
- self.subsample = subsample
20
- self.verbosity = verbosity
21
- # xgboost -----
22
- # n_estimators
23
- # learning_rate
24
- # subsample
25
- # max_depth
26
- # lightgbm -----
27
- # n_estimators
28
- # learning_rate
29
- # bagging_fraction
30
- # max_depth
31
- # catboost -----
32
- # iterations
33
- # learning_rate
34
- # rsm
35
- # depth
36
- if self.model_type == "xgboost":
37
- self.params = {
38
- 'n_estimators': self.n_estimators,
39
- 'learning_rate': self.learning_rate,
40
- 'subsample': self.subsample,
41
- 'max_depth': self.max_depth,
42
- 'verbosity': self.verbosity,
43
- **kwargs
44
- }
45
- elif self.model_type == "lightgbm":
46
- verbose = self.verbosity - 1 if self.verbosity==0 else self.verbosity
47
- self.params = {
48
- 'n_estimators': self.n_estimators,
49
- 'learning_rate': self.learning_rate,
50
- 'bagging_fraction': self.subsample,
51
- 'max_depth': self.max_depth,
52
- 'verbose': verbose,
53
- **kwargs
54
- }
55
- elif self.model_type == "catboost":
56
- self.params = {
57
- 'iterations': self.n_estimators,
58
- 'learning_rate': self.learning_rate,
59
- 'rsm': self.subsample,
60
- 'depth': self.max_depth,
61
- 'verbose': self.verbosity,
62
- **kwargs
63
- }
64
-
65
- if model_type == 'xgboost':
66
- self.model = XGBClassifier(**self.params)
67
- elif model_type == 'catboost':
68
- self.model = CatBoostClassifier(**self.params)
69
- elif model_type == 'lightgbm':
70
- self.model = LGBMClassifier(**self.params)
71
- else:
72
- raise ValueError(f"Unknown model_type: {model_type}")
73
-
74
- def fit(self, X, y, **kwargs):
75
- return self.model.fit(X, y, **kwargs)
76
-
77
- def predict(self, X):
78
- return self.model.predict(X)
79
-
80
- def predict_proba(self, X):
81
- return self.model.predict_proba(X)
@@ -1,78 +0,0 @@
1
- from sklearn.base import BaseEstimator, RegressorMixin
2
- from xgboost import XGBRegressor
3
- from catboost import CatBoostRegressor
4
- from lightgbm import LGBMRegressor
5
-
6
-
7
- class GBDTRegressor(BaseEstimator, RegressorMixin):
8
- def __init__(self, model_type='xgboost',
9
- n_estimators=100,
10
- learning_rate=0.1,
11
- max_depth=3,
12
- subsample=1.0,
13
- verbosity=0,
14
- **kwargs):
15
- self.model_type = model_type
16
- self.n_estimators = n_estimators
17
- self.learning_rate = learning_rate
18
- self.max_depth = max_depth
19
- self.subsample = subsample
20
- self.verbosity = verbosity
21
- # xgboost -----
22
- # n_estimators
23
- # learning_rate
24
- # subsample
25
- # max_depth
26
- # lightgbm -----
27
- # n_estimators
28
- # learning_rate
29
- # bagging_fraction
30
- # max_depth
31
- # catboost -----
32
- # iterations
33
- # learning_rate
34
- # rsm
35
- # depth
36
- if self.model_type == "xgboost":
37
- self.params = {
38
- 'n_estimators': self.n_estimators,
39
- 'learning_rate': self.learning_rate,
40
- 'subsample': self.subsample,
41
- 'max_depth': self.max_depth,
42
- 'verbosity': self.verbosity,
43
- **kwargs
44
- }
45
- elif self.model_type == "lightgbm":
46
- verbose = self.verbosity - 1 if self.verbosity==0 else self.verbosity
47
- self.params = {
48
- 'n_estimators': self.n_estimators,
49
- 'learning_rate': self.learning_rate,
50
- 'bagging_fraction': self.subsample,
51
- 'max_depth': self.max_depth,
52
- 'verbose': verbose,
53
- **kwargs
54
- }
55
- elif self.model_type == "catboost":
56
- self.params = {
57
- 'iterations': self.n_estimators,
58
- 'learning_rate': self.learning_rate,
59
- 'rsm': self.subsample,
60
- 'depth': self.max_depth,
61
- 'verbose': self.verbosity,
62
- **kwargs
63
- }
64
-
65
- if model_type == 'xgboost':
66
- self.model = XGBRegressor(**self.params)
67
- elif model_type == 'catboost':
68
- self.model = CatBoostRegressor(**self.params)
69
- elif model_type == 'lightgbm':
70
- self.model = LGBMRegressor(**self.params)
71
- else:
72
- raise ValueError(f"Unknown model_type: {model_type}")
73
-
74
- def fit(self, X, y, **kwargs):
75
- return self.model.fit(X, y, **kwargs)
76
-
77
- def predict(self, X):
78
- return self.model.predict(X)
File without changes
File without changes