PyPI - unifiedbooster - Versions diffs - 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

unifiedbooster 0.5.0py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

unifiedbooster/gbdt.py +9 -5
unifiedbooster/gbdt_classification.py +43 -11
unifiedbooster/gbdt_regression.py +45 -12
unifiedbooster/gpoptimization.py +126 -61
unifiedbooster/nonconformist/__init__.py +36 -0
unifiedbooster/nonconformist/acp.py +381 -0
unifiedbooster/nonconformist/base.py +156 -0
unifiedbooster/nonconformist/cp.py +172 -0
unifiedbooster/nonconformist/evaluation.py +486 -0
unifiedbooster/nonconformist/icp.py +442 -0
unifiedbooster/nonconformist/nc.py +610 -0
unifiedbooster/nonconformist/util.py +9 -0
unifiedbooster/predictioninterval/__init__.py +3 -0
unifiedbooster/predictioninterval/predictioninterval.py +314 -0
unifiedbooster/predictionset/__init__.py +3 -0
unifiedbooster/predictionset/predictionset.py +111 -0
{unifiedbooster-0.5.0.dist-info → unifiedbooster-0.7.0.dist-info}/METADATA +1 -1
unifiedbooster-0.7.0.dist-info/RECORD +23 -0
{unifiedbooster-0.5.0.dist-info → unifiedbooster-0.7.0.dist-info}/WHEEL +1 -1
unifiedbooster-0.5.0.dist-info/RECORD +0 -11
{unifiedbooster-0.5.0.dist-info → unifiedbooster-0.7.0.dist-info}/LICENSE +0 -0
{unifiedbooster-0.5.0.dist-info → unifiedbooster-0.7.0.dist-info}/entry_points.txt +0 -0
{unifiedbooster-0.5.0.dist-info → unifiedbooster-0.7.0.dist-info}/top_level.txt +0 -0

unifiedbooster/gbdt.py CHANGED Viewed

@@ -35,7 +35,6 @@ class GBDT(BaseEstimator):
         **kwargs: dict
             additional parameters to be passed to the class
     """
     def __init__(
         self,
         model_type="xgboost",
@@ -44,6 +43,8 @@ class GBDT(BaseEstimator):
         max_depth=3,
         rowsample=1.0,
         colsample=1.0,
+        level=None,
+        pi_method=None,
         verbose=0,
         seed=123,
         **kwargs
@@ -55,6 +56,8 @@ class GBDT(BaseEstimator):
         self.max_depth = max_depth
         self.rowsample = rowsample
         self.colsample = colsample
+        self.level = level
+        self.pi_method = pi_method
         self.verbose = verbose
         self.seed = seed
@@ -91,7 +94,7 @@ class GBDT(BaseEstimator):
                 "verbose": self.verbose,
                 "random_seed": self.seed,
                 "boosting_type": "Plain",
-                "leaf_estimation_iterations": 1,
+                "leaf_estimation_iterations": 1,
                 "bootstrap_type": "Bernoulli",
                 **kwargs,
             }
@@ -126,7 +129,6 @@ class GBDT(BaseEstimator):
             self: object
         """
         if getattr(self, "type_fit") == "classification":
             self.classes_ = np.unique(y)  # for compatibility with sklearn
             self.n_classes_ = len(
@@ -152,5 +154,7 @@ class GBDT(BaseEstimator):
             model predictions: {array-like}
         """
-        return getattr(self, "model").predict(X)
+        if self.level is not None and self.type_fit == "regression":
+            return getattr(self, "model").predict(X, return_pi=True)
+        else:
+            return getattr(self, "model").predict(X)

unifiedbooster/gbdt_classification.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from .gbdt import GBDT
 from sklearn.base import ClassifierMixin
+from .predictionset import PredictionSet
 try:
     from xgboost import XGBClassifier
@@ -39,6 +40,12 @@ class GBDTClassifier(GBDT, ClassifierMixin):
         colsample: float
             percentage of features to use at each node split
+        level: float
+            confidence level for prediction sets
+        pi_method: str
+            method for constructing the prediction intervals: 'icp' (inductive conformal), 'tcp' (transductive conformal)
         verbose: int
             controls verbosity (default=0)
@@ -88,7 +95,6 @@ class GBDTClassifier(GBDT, ClassifierMixin):
         print(f"Classification Accuracy lightgbm: {accuracy3:.2f}")
         ```
     """
     def __init__(
         self,
         model_type="xgboost",
@@ -97,6 +103,8 @@ class GBDTClassifier(GBDT, ClassifierMixin):
         max_depth=3,
         rowsample=1.0,
         colsample=1.0,
+        level=None,
+        pi_method="icp",
         verbose=0,
         seed=123,
         **kwargs,
@@ -111,21 +119,46 @@ class GBDTClassifier(GBDT, ClassifierMixin):
             max_depth=max_depth,
             rowsample=rowsample,
             colsample=colsample,
+            level=level,
+            pi_method=pi_method,
             verbose=verbose,
             seed=seed,
             **kwargs,
         )
-        if model_type == "xgboost":
-            self.model = XGBClassifier(**self.params)
-        elif model_type == "catboost":
-            self.model = CatBoostClassifier(**self.params)
-        elif model_type == "lightgbm":
-            self.model = LGBMClassifier(**self.params)
-        elif model_type == "gradientboosting":
-            self.model = GradientBoostingClassifier(**self.params)
+        if self.level is not None:
+            if model_type == "xgboost":
+                self.model = PredictionSet(XGBClassifier(**self.params),
+                                            level=self.level,
+                                            method=self.pi_method)
+            elif model_type == "catboost":
+                self.model = PredictionSet(CatBoostClassifier(**self.params),
+                                            level=self.level,
+                                            method=self.pi_method)
+            elif model_type == "lightgbm":
+                self.model = PredictionSet(LGBMClassifier(**self.params),
+                                            level=self.level,
+                                            method=self.pi_method)
+            elif model_type == "gradientboosting":
+                self.model = PredictionSet(GradientBoostingClassifier(**self.params),
+                                            level=self.level,
+                                            method=self.pi_method)
+            else:
+                raise ValueError(f"Unknown model_type: {model_type}")
         else:
-            raise ValueError(f"Unknown model_type: {model_type}")
+            if model_type == "xgboost":
+                self.model = XGBClassifier(**self.params)
+            elif model_type == "catboost":
+                self.model = CatBoostClassifier(**self.params)
+            elif model_type == "lightgbm":
+                self.model = LGBMClassifier(**self.params)
+            elif model_type == "gradientboosting":
+                self.model = GradientBoostingClassifier(**self.params)
+            else:
+                raise ValueError(f"Unknown model_type: {model_type}")
     def predict_proba(self, X):
         """Predict probabilities for test data X.
@@ -143,5 +176,4 @@ class GBDTClassifier(GBDT, ClassifierMixin):
             probability estimates for test data: {array-like}
         """
         return self.model.predict_proba(X)

unifiedbooster/gbdt_regression.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from .gbdt import GBDT
 from sklearn.base import RegressorMixin
+from .predictioninterval import PredictionInterval
 try:
     from xgboost import XGBRegressor
@@ -39,6 +40,12 @@ class GBDTRegressor(GBDT, RegressorMixin):
         colsample: float
             percentage of features to use at each node split
+        level: float
+            confidence level for prediction sets
+        pi_method: str
+            method for constructing the prediction intervals: 'splitconformal', 'localconformal'
         verbose: int
             controls verbosity (default=0)
@@ -88,7 +95,6 @@ class GBDTRegressor(GBDT, RegressorMixin):
         print(f"Regression Mean Squared Error lightgbm: {mse3:.2f}")
         ```
     """
     def __init__(
         self,
         model_type="xgboost",
@@ -97,12 +103,14 @@ class GBDTRegressor(GBDT, RegressorMixin):
         max_depth=3,
         rowsample=1.0,
         colsample=1.0,
+        level=None,
+        pi_method="splitconformal",
         verbose=0,
         seed=123,
         **kwargs,
     ):
-        self.type_fit = "regression"
+        self.type_fit = "regression"
         super().__init__(
             model_type=model_type,
@@ -111,18 +119,43 @@ class GBDTRegressor(GBDT, RegressorMixin):
             max_depth=max_depth,
             rowsample=rowsample,
             colsample=colsample,
+            level=level,
+            pi_method=pi_method,
             verbose=verbose,
             seed=seed,
             **kwargs,
         )
-        if model_type == "xgboost":
-            self.model = XGBRegressor(**self.params)
-        elif model_type == "catboost":
-            self.model = CatBoostRegressor(**self.params)
-        elif model_type == "lightgbm":
-            self.model = LGBMRegressor(**self.params)
-        elif model_type == "gradientboosting":
-            self.model = GradientBoostingRegressor(**self.params)
-        else:
-            raise ValueError(f"Unknown model_type: {model_type}")
+        if self.level is not None:
+            if model_type == "xgboost":
+                self.model = PredictionInterval(XGBRegressor(**self.params),
+                                                level=self.level,
+                                                method=self.pi_method)
+            elif model_type == "catboost":
+                self.model = PredictionInterval(CatBoostRegressor(**self.params),
+                                                level=self.level,
+                                                method=self.pi_method)
+            elif model_type == "lightgbm":
+                self.model = PredictionInterval(LGBMRegressor(**self.params),
+                                                level=self.level,
+                                                method=self.pi_method)
+            elif model_type == "gradientboosting":
+                self.model = PredictionInterval(GradientBoostingRegressor(**self.params),
+                                                level=self.level,
+                                                method=self.pi_method)
+            else:
+                raise ValueError(f"Unknown model_type: {model_type}")
+        else:
+            if model_type == "xgboost":
+                self.model = XGBRegressor(**self.params)
+            elif model_type == "catboost":
+                self.model = CatBoostRegressor(**self.params)
+            elif model_type == "lightgbm":
+                self.model = LGBMRegressor(**self.params)
+            elif model_type == "gradientboosting":
+                self.model = GradientBoostingRegressor(**self.params)
+            else:
+                raise ValueError(f"Unknown model_type: {model_type}")

unifiedbooster/gpoptimization.py CHANGED Viewed

@@ -18,7 +18,7 @@ def cross_val_optim(
     model_type="xgboost",
     type_fit="classification",
     scoring="accuracy",
-    n_estimators=100,
+    n_estimators=None,
     surrogate_obj=None,
     cv=5,
     n_jobs=None,
@@ -59,7 +59,7 @@ def cross_val_optim(
             scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules
         n_estimators: int
-            maximum number of trees that can be built
+            maximum number of trees that can be built (default is None, and if None, then the parameter is tuned)
         surrogate_obj: an object;
             An ML model for estimating the uncertainty around the objective function
@@ -168,63 +168,128 @@ def cross_val_optim(
         ).mean()
     # objective function for hyperparams tuning
-    def crossval_objective(xx):
-        return gbdt_cv(
-            X_train=X_train,
-            y_train=y_train,
-            model_type=model_type,
-            n_estimators=n_estimators,
-            learning_rate=10 ** xx[0],
-            max_depth=int(xx[1]),
-            rowsample=xx[2],
-            colsample=xx[3],
-            cv=cv,
-            n_jobs=n_jobs,
-            type_fit=type_fit,
-            scoring=scoring,
-            seed=seed,
-        )
+    if n_estimators is not None:
-    if surrogate_obj is None:
-        gp_opt = gp.GPOpt(
-            objective_func=crossval_objective,
-            lower_bound=np.array([-6, 1, 0.5, 0.5]),
-            upper_bound=np.array([0, 16, 1.0, 1.0]),
-            params_names=[
-                "learning_rate",
-                "max_depth",
-                "rowsample",
-                "colsample",
-            ],
-            method="bayesian",
-            n_init=n_init,
-            n_iter=n_iter,
-            seed=seed,
-        )
-    else:
-        gp_opt = gp.GPOpt(
-            objective_func=crossval_objective,
-            lower_bound=np.array([-6, 1, 0.5, 0.5]),
-            upper_bound=np.array([0, 16, 1.0, 1.0]),
-            params_names=[
-                "learning_rate",
-                "max_depth",
-                "rowsample",
-                "colsample",
-            ],
-            acquisition="ucb",
-            method="splitconformal",
-            surrogate_obj=ns.PredictionInterval(
-                obj=surrogate_obj, method="splitconformal"
-            ),
-            n_init=n_init,
-            n_iter=n_iter,
-            seed=seed,
-        )
+        def crossval_objective(xx):
+            return gbdt_cv(
+                X_train=X_train,
+                y_train=y_train,
+                model_type=model_type,
+                n_estimators=n_estimators,
+                learning_rate=10 ** xx[0],
+                max_depth=int(xx[1]),
+                rowsample=xx[2],
+                colsample=xx[3],
+                cv=cv,
+                n_jobs=n_jobs,
+                type_fit=type_fit,
+                scoring=scoring,
+                seed=seed,
+            )
+    else:  # n_estimators is None
+        def crossval_objective(xx):
+            return gbdt_cv(
+                X_train=X_train,
+                y_train=y_train,
+                model_type=model_type,
+                n_estimators=int(10 ** xx[4]),
+                learning_rate=10 ** xx[0],
+                max_depth=int(xx[1]),
+                rowsample=xx[2],
+                colsample=xx[3],
+                cv=cv,
+                n_jobs=n_jobs,
+                type_fit=type_fit,
+                scoring=scoring,
+                seed=seed,
+            )
+    if n_estimators is not None:
+        if surrogate_obj is None:
+            gp_opt = gp.GPOpt(
+                objective_func=crossval_objective,
+                lower_bound=np.array([-6, 1, 0.5, 0.5]),
+                upper_bound=np.array([0, 16, 1.0, 1.0]),
+                params_names=[
+                    "learning_rate",
+                    "max_depth",
+                    "rowsample",
+                    "colsample",
+                ],
+                method="bayesian",
+                n_init=n_init,
+                n_iter=n_iter,
+                seed=seed,
+            )
+        else:
+            gp_opt = gp.GPOpt(
+                objective_func=crossval_objective,
+                lower_bound=np.array([-6, 1, 0.5, 0.5]),
+                upper_bound=np.array([0, 16, 1.0, 1.0]),
+                params_names=[
+                    "learning_rate",
+                    "max_depth",
+                    "rowsample",
+                    "colsample",
+                ],
+                acquisition="ucb",
+                method="splitconformal",
+                surrogate_obj=ns.PredictionInterval(
+                    obj=surrogate_obj, method="splitconformal"
+                ),
+                n_init=n_init,
+                n_iter=n_iter,
+                seed=seed,
+            )
+    else:  # n_estimators is None
+        if surrogate_obj is None:
+            gp_opt = gp.GPOpt(
+                objective_func=crossval_objective,
+                lower_bound=np.array([-6, 1, 0.5, 0.5, 2]),
+                upper_bound=np.array([0, 16, 1.0, 1.0, 3]),
+                params_names=[
+                    "learning_rate",
+                    "max_depth",
+                    "rowsample",
+                    "colsample",
+                    "n_estimators",
+                ],
+                method="bayesian",
+                n_init=n_init,
+                n_iter=n_iter,
+                seed=seed,
+            )
+        else:
+            gp_opt = gp.GPOpt(
+                objective_func=crossval_objective,
+                lower_bound=np.array([-6, 1, 0.5, 0.5, 2]),
+                upper_bound=np.array([0, 16, 1.0, 1.0, 3]),
+                params_names=[
+                    "learning_rate",
+                    "max_depth",
+                    "rowsample",
+                    "colsample",
+                    "n_estimators",
+                ],
+                acquisition="ucb",
+                method="splitconformal",
+                surrogate_obj=ns.PredictionInterval(
+                    obj=surrogate_obj, method="splitconformal"
+                ),
+                n_init=n_init,
+                n_iter=n_iter,
+                seed=seed,
+            )
     res = gp_opt.optimize(verbose=verbose, abs_tol=abs_tol)
     res.best_params["model_type"] = model_type
-    res.best_params["n_estimators"] = int(n_estimators)
+    res.best_params["n_estimators"] = (
+        int(n_estimators)
+        if n_estimators is not None
+        else int(10 ** res.best_params["n_estimators"])
+    )
     res.best_params["learning_rate"] = 10 ** res.best_params["learning_rate"]
     res.best_params["max_depth"] = int(res.best_params["max_depth"])
     res.best_params["rowsample"] = res.best_params["rowsample"]
@@ -256,7 +321,7 @@ def lazy_cross_val_optim(
     type_fit="classification",
     scoring="accuracy",
     customize=False,
-    n_estimators=100,
+    n_estimators=None,
     cv=5,
     n_jobs=None,
     n_init=10,
@@ -297,9 +362,9 @@ def lazy_cross_val_optim(
         customize: boolean
             if True, the surrogate is transformed into a quasi-randomized network (default is False)
         n_estimators: int
-            maximum number of trees that can be built
+            maximum number of trees that can be built (default is None, if None, the  parameters is tuned)
         cv: int;
             number of cross-validation folds
@@ -325,7 +390,7 @@ def lazy_cross_val_optim(
     Examples:
         ```python
-        import os
+        import os
         import unifiedbooster as ub
         from sklearn.datasets import load_breast_cancer
         from sklearn.model_selection import train_test_split
@@ -396,7 +461,7 @@ def lazy_cross_val_optim(
                 if customize == True:
                     print(f"\n surrogate: CustomRegressor({est[0]})")
                     surr_obj = ns.CustomRegressor(obj=est[1]())
-                else:
+                else:
                     print(f"\n surrogate: {est[0]}")
                     surr_obj = est[1]()
                 res = cross_val_optim(
@@ -421,7 +486,7 @@ def lazy_cross_val_optim(
                 if customize == True:
                     results.append((f"CustomRegressor({est[0]})", res))
                 else:
-                    results.append((est[0], res))
+                    results.append((est[0], res))
             except:
                 pass

unifiedbooster/nonconformist/__init__.py ADDED Viewed

@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+"""
+docstring
+"""
+# Authors: Henrik Linusson
+# Yaniv Romano modified np.py file to include CQR
+# T. Moudiki modified __init__.py to import classes
+# __version__ = '2.1.0'
+from .nc import (
+    AbsErrorErrFunc,
+    QuantileRegErrFunc,
+    RegressorNc,
+    RegressorNormalizer,
+)
+from .cp import IcpRegressor, TcpClassifier
+from .icp import IcpClassifier
+from .nc import ClassifierNc, MarginErrFunc
+from .base import RegressorAdapter, ClassifierAdapter
+__all__ = [
+    "AbsErrorErrFunc",
+    "MarginErrFunc",
+    "QuantileRegErrFunc",
+    "RegressorAdapter",
+    "ClassifierAdapter",
+    "RegressorNc",
+    "ClassifierNc",
+    "RegressorNormalizer",
+    "IcpRegressor",
+    "IcpClassifier",
+    "TcpClassifier"
+]

unifiedbooster 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

unifiedbooster 0.5.0py3-none-any.whl → 0.7.0py3-none-any.whl