PyPI - unifiedbooster - Versions diffs - 0.6.0__tar.gz → 0.9.0__tar.gz - Mend

unifiedbooster 0.6.0tar.gz → 0.9.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

{unifiedbooster-0.6.0 → unifiedbooster-0.9.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: unifiedbooster
-Version: 0.6.0
+Version: 0.9.0
 Summary: Unified interface for Gradient Boosted Decision Trees
 Home-page: https://github.com/thierrymoudiki/unifiedbooster
 Author: T. Moudiki
@@ -22,8 +22,18 @@ Requires-Dist: numpy
 Requires-Dist: scikit-learn
 Requires-Dist: xgboost
 Requires-Dist: lightgbm
-Requires-Dist: catboost
 Requires-Dist: GPopt
 Requires-Dist: nnetsauce
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: home-page
+Dynamic: keywords
+Dynamic: license
+Dynamic: license-file
+Dynamic: requires-dist
+Dynamic: requires-python
+Dynamic: summary
 Unified interface for Gradient Boosted Decision Trees

{unifiedbooster-0.6.0 → unifiedbooster-0.9.0}/README.md RENAMED Viewed

@@ -7,6 +7,11 @@ Unified interface for Gradient Boosted Decision Trees algorithms
 ## Examples
+See also:
+- Auto XGBoost, Auto LightGBM, Auto CatBoost, Auto GradientBoosting: https://thierrymoudiki.github.io/blog/2024/08/05/python/r/unibooster
+- Prediction sets and prediction intervals for conformalized Auto XGBoost, Auto LightGBM, Auto CatBoost, Auto GradientBoosting: https://thierrymoudiki.github.io/blog/2024/09/02/python/r/conformalized-unibooster
+- Notebooks in [/unifiedbooster/demo](/unifiedbooster/demo)
 ### classification
 ```python
@@ -90,4 +95,4 @@ for dataset in datasets:
   print(f"Regression Root Mean Squared Error xgboost: {mse1:.2f}")
   print(f"Regression Root Mean Squared Error catboost: {mse2:.2f}")
   print(f"Regression Root Mean Squared Error lightgbm: {mse3:.2f}")
-```
+```

{unifiedbooster-0.6.0 → unifiedbooster-0.9.0}/setup.py RENAMED Viewed

@@ -7,9 +7,7 @@ from setuptools import setup, find_packages
 from codecs import open
 from os import path
-subprocess.check_call(['pip', 'install', 'Cython'])
-__version__ = "0.6.0"
+__version__ = "0.9.0"
 here = path.abspath(path.dirname(__file__))

{unifiedbooster-0.6.0 → unifiedbooster-0.9.0}/unifiedbooster/gbdt.py RENAMED Viewed

@@ -44,6 +44,8 @@ class GBDT(BaseEstimator):
         max_depth=3,
         rowsample=1.0,
         colsample=1.0,
+        level=None,
+        pi_method=None,
         verbose=0,
         seed=123,
         **kwargs
@@ -55,6 +57,8 @@ class GBDT(BaseEstimator):
         self.max_depth = max_depth
         self.rowsample = rowsample
         self.colsample = colsample
+        self.level = level
+        self.pi_method = pi_method
         self.verbose = verbose
         self.seed = seed
@@ -91,7 +95,7 @@ class GBDT(BaseEstimator):
                 "verbose": self.verbose,
                 "random_seed": self.seed,
                 "boosting_type": "Plain",
-                "leaf_estimation_iterations": 1,
+                "leaf_estimation_iterations": 1,
                 "bootstrap_type": "Bernoulli",
                 **kwargs,
             }
@@ -126,7 +130,6 @@ class GBDT(BaseEstimator):
             self: object
         """
         if getattr(self, "type_fit") == "classification":
             self.classes_ = np.unique(y)  # for compatibility with sklearn
             self.n_classes_ = len(
@@ -152,5 +155,7 @@ class GBDT(BaseEstimator):
             model predictions: {array-like}
         """
-        return getattr(self, "model").predict(X)
+        if self.level is not None and self.type_fit == "regression":
+            return getattr(self, "model").predict(X, return_pi=True)
+        else:
+            return getattr(self, "model").predict(X)

{unifiedbooster-0.6.0 → unifiedbooster-0.9.0}/unifiedbooster/gbdt_classification.py RENAMED Viewed

@@ -1,5 +1,6 @@
 from .gbdt import GBDT
 from sklearn.base import ClassifierMixin
+from .predictionset import PredictionSet
 try:
     from xgboost import XGBClassifier
@@ -40,6 +41,12 @@ class GBDTClassifier(GBDT, ClassifierMixin):
         colsample: float
             percentage of features to use at each node split
+        level: float
+            confidence level for prediction sets
+        pi_method: str
+            method for constructing the prediction intervals: 'icp' (inductive conformal), 'tcp' (transductive conformal)
         verbose: int
             controls verbosity (default=0)
@@ -97,6 +104,8 @@ class GBDTClassifier(GBDT, ClassifierMixin):
         max_depth=3,
         rowsample=1.0,
         colsample=1.0,
+        level=None,
+        pi_method="icp",
         verbose=0,
         seed=123,
         **kwargs,
@@ -111,21 +120,54 @@ class GBDTClassifier(GBDT, ClassifierMixin):
             max_depth=max_depth,
             rowsample=rowsample,
             colsample=colsample,
+            level=level,
+            pi_method=pi_method,
             verbose=verbose,
             seed=seed,
             **kwargs,
         )
-        if model_type == "xgboost":
-            self.model = XGBClassifier(**self.params)
-        elif model_type == "catboost":
-            self.model = CatBoostClassifier(**self.params)
-        elif model_type == "lightgbm":
-            self.model = LGBMClassifier(**self.params)
-        elif model_type == "gradientboosting":
-            self.model = GradientBoostingClassifier(**self.params)
+        if self.level is not None:
+            if model_type in ("xgboost", "xgb"):
+                self.model = PredictionSet(
+                    XGBClassifier(**self.params),
+                    level=self.level,
+                    method=self.pi_method,
+                )
+            elif model_type in ("catboost", "cb"):
+                self.model = PredictionSet(
+                    CatBoostClassifier(**self.params),
+                    level=self.level,
+                    method=self.pi_method,
+                )
+            elif model_type in ("lightgbm", "lgb"):
+                self.model = PredictionSet(
+                    LGBMClassifier(**self.params),
+                    level=self.level,
+                    method=self.pi_method,
+                )
+            elif model_type in ("gradientboosting", "gb"):
+                self.model = PredictionSet(
+                    GradientBoostingClassifier(**self.params),
+                    level=self.level,
+                    method=self.pi_method,
+                )
+            else:
+                raise ValueError(f"Unknown model_type: {model_type}")
         else:
-            raise ValueError(f"Unknown model_type: {model_type}")
+            if model_type in ("xgboost", "xgb"):
+                self.model = XGBClassifier(**self.params)
+            elif model_type in ("catboost", "cb"):
+                self.model = CatBoostClassifier(**self.params)
+            elif model_type in ("lightgbm", "lgb"):
+                self.model = LGBMClassifier(**self.params)
+            elif model_type in ("gradientboosting", "gb"):
+                self.model = GradientBoostingClassifier(**self.params)
+            else:
+                raise ValueError(f"Unknown model_type: {model_type}")
     def predict_proba(self, X):
         """Predict probabilities for test data X.
@@ -143,5 +185,4 @@ class GBDTClassifier(GBDT, ClassifierMixin):
             probability estimates for test data: {array-like}
         """
         return self.model.predict_proba(X)

{unifiedbooster-0.6.0 → unifiedbooster-0.9.0}/unifiedbooster/gbdt_regression.py RENAMED Viewed

@@ -1,5 +1,6 @@
 from .gbdt import GBDT
 from sklearn.base import RegressorMixin
+from .predictioninterval import PredictionInterval
 try:
     from xgboost import XGBRegressor
@@ -40,6 +41,16 @@ class GBDTRegressor(GBDT, RegressorMixin):
         colsample: float
             percentage of features to use at each node split
+        level: float
+            confidence level for prediction sets
+        pi_method: str
+            method for constructing the prediction intervals: 'splitconformal', 'localconformal'
+        type_split: a string;
+            Only if `level` is not `None`
+            "random" (random split of data) or "sequential" (sequential split of data)
         verbose: int
             controls verbosity (default=0)
@@ -97,12 +108,16 @@ class GBDTRegressor(GBDT, RegressorMixin):
         max_depth=3,
         rowsample=1.0,
         colsample=1.0,
+        level=None,
+        pi_method="splitconformal",
+        type_split="random",
         verbose=0,
         seed=123,
         **kwargs,
     ):
         self.type_fit = "regression"
+        self.type_split = type_split
         super().__init__(
             model_type=model_type,
@@ -111,18 +126,55 @@ class GBDTRegressor(GBDT, RegressorMixin):
             max_depth=max_depth,
             rowsample=rowsample,
             colsample=colsample,
+            level=level,
+            pi_method=pi_method,
             verbose=verbose,
             seed=seed,
             **kwargs,
         )
-        if model_type == "xgboost":
-            self.model = XGBRegressor(**self.params)
-        elif model_type == "catboost":
-            self.model = CatBoostRegressor(**self.params)
-        elif model_type == "lightgbm":
-            self.model = LGBMRegressor(**self.params)
-        elif model_type == "gradientboosting":
-            self.model = GradientBoostingRegressor(**self.params)
+        if self.level is not None:
+            if model_type in ("xgboost", "xgb"):
+                self.model = PredictionInterval(
+                    XGBRegressor(**self.params),
+                    level=self.level,
+                    method=self.pi_method,
+                    type_split=self.type_split
+                )
+            elif model_type in ("catboost", "cb"):
+                self.model = PredictionInterval(
+                    CatBoostRegressor(**self.params),
+                    level=self.level,
+                    method=self.pi_method,
+                    type_split=self.type_split
+                )
+            elif model_type in ("lightgbm", "lgb"):
+                self.model = PredictionInterval(
+                    LGBMRegressor(**self.params),
+                    level=self.level,
+                    method=self.pi_method,
+                    type_split=self.type_split
+                )
+            elif model_type in ("gradientboosting", "gb"):
+                self.model = PredictionInterval(
+                    GradientBoostingRegressor(**self.params),
+                    level=self.level,
+                    method=self.pi_method,
+                    type_split=self.type_split
+                )
+            else:
+                raise ValueError(f"Unknown model_type: {model_type}")
         else:
-            raise ValueError(f"Unknown model_type: {model_type}")
+            if model_type in ("xgboost", "xgb"):
+                self.model = XGBRegressor(**self.params)
+            elif model_type in ("catboost", "cb"):
+                self.model = CatBoostRegressor(**self.params)
+            elif model_type in ("lightgbm", "lgb"):
+                self.model = LGBMRegressor(**self.params)
+            elif model_type in ("gradientboosting", "gb"):
+                self.model = GradientBoostingRegressor(**self.params)
+            else:
+                raise ValueError(f"Unknown model_type: {model_type}")

{unifiedbooster-0.6.0 → unifiedbooster-0.9.0}/unifiedbooster/gpoptimization.py RENAMED Viewed

@@ -168,7 +168,8 @@ def cross_val_optim(
         ).mean()
     # objective function for hyperparams tuning
-    if n_estimators is not None:
+    if n_estimators is not None:
         def crossval_objective(xx):
             return gbdt_cv(
                 X_train=X_train,
@@ -185,25 +186,27 @@ def cross_val_optim(
                 scoring=scoring,
                 seed=seed,
             )
-    else: # n_estimators is None
+    else:  # n_estimators is None
         def crossval_objective(xx):
             return gbdt_cv(
-                    X_train=X_train,
-                    y_train=y_train,
-                    model_type=model_type,
-                    n_estimators=int(10 ** xx[4]),
-                    learning_rate=10 ** xx[0],
-                    max_depth=int(xx[1]),
-                    rowsample=xx[2],
-                    colsample=xx[3],
-                    cv=cv,
-                    n_jobs=n_jobs,
-                    type_fit=type_fit,
-                    scoring=scoring,
-                    seed=seed,
-                )
+                X_train=X_train,
+                y_train=y_train,
+                model_type=model_type,
+                n_estimators=int(10 ** xx[4]),
+                learning_rate=10 ** xx[0],
+                max_depth=int(xx[1]),
+                rowsample=xx[2],
+                colsample=xx[3],
+                cv=cv,
+                n_jobs=n_jobs,
+                type_fit=type_fit,
+                scoring=scoring,
+                seed=seed,
+            )
-    if n_estimators is not None:
+    if n_estimators is not None:
         if surrogate_obj is None:
             gp_opt = gp.GPOpt(
                 objective_func=crossval_objective,
@@ -240,7 +243,7 @@ def cross_val_optim(
                 n_iter=n_iter,
                 seed=seed,
             )
-    else: # n_estimators is None
+    else:  # n_estimators is None
         if surrogate_obj is None:
             gp_opt = gp.GPOpt(
                 objective_func=crossval_objective,
@@ -251,7 +254,7 @@ def cross_val_optim(
                     "max_depth",
                     "rowsample",
                     "colsample",
-                    "n_estimators"
+                    "n_estimators",
                 ],
                 method="bayesian",
                 n_init=n_init,
@@ -268,7 +271,7 @@ def cross_val_optim(
                     "max_depth",
                     "rowsample",
                     "colsample",
-                    "n_estimators"
+                    "n_estimators",
                 ],
                 acquisition="ucb",
                 method="splitconformal",
@@ -282,7 +285,11 @@ def cross_val_optim(
     res = gp_opt.optimize(verbose=verbose, abs_tol=abs_tol)
     res.best_params["model_type"] = model_type
-    res.best_params["n_estimators"] = int(n_estimators) if n_estimators is not None else int(10 ** res.best_params["n_estimators"])
+    res.best_params["n_estimators"] = (
+        int(n_estimators)
+        if n_estimators is not None
+        else int(10 ** res.best_params["n_estimators"])
+    )
     res.best_params["learning_rate"] = 10 ** res.best_params["learning_rate"]
     res.best_params["max_depth"] = int(res.best_params["max_depth"])
     res.best_params["rowsample"] = res.best_params["rowsample"]
@@ -355,7 +362,7 @@ def lazy_cross_val_optim(
         customize: boolean
             if True, the surrogate is transformed into a quasi-randomized network (default is False)
         n_estimators: int
             maximum number of trees that can be built (default is None, if None, the  parameters is tuned)
@@ -383,7 +390,7 @@ def lazy_cross_val_optim(
     Examples:
         ```python
-        import os
+        import os
         import unifiedbooster as ub
         from sklearn.datasets import load_breast_cancer
         from sklearn.model_selection import train_test_split
@@ -454,7 +461,7 @@ def lazy_cross_val_optim(
                 if customize == True:
                     print(f"\n surrogate: CustomRegressor({est[0]})")
                     surr_obj = ns.CustomRegressor(obj=est[1]())
-                else:
+                else:
                     print(f"\n surrogate: {est[0]}")
                     surr_obj = est[1]()
                 res = cross_val_optim(
@@ -479,7 +486,7 @@ def lazy_cross_val_optim(
                 if customize == True:
                     results.append((f"CustomRegressor({est[0]})", res))
                 else:
-                    results.append((est[0], res))
+                    results.append((est[0], res))
             except:
                 pass

{unifiedbooster-0.6.0 → unifiedbooster-0.9.0}/unifiedbooster/nonconformist/__init__.py RENAMED Viewed

@@ -18,13 +18,19 @@ from .nc import (
 )
 from .cp import IcpRegressor, TcpClassifier
 from .icp import IcpClassifier
-from .base import RegressorAdapter
+from .nc import ClassifierNc, MarginErrFunc
+from .base import RegressorAdapter, ClassifierAdapter
 __all__ = [
     "AbsErrorErrFunc",
+    "MarginErrFunc",
     "QuantileRegErrFunc",
     "RegressorAdapter",
+    "ClassifierAdapter",
     "RegressorNc",
+    "ClassifierNc",
     "RegressorNormalizer",
     "IcpRegressor",
+    "IcpClassifier",
+    "TcpClassifier",
 ]

{unifiedbooster-0.6.0 → unifiedbooster-0.9.0}/unifiedbooster/nonconformist/base.py RENAMED Viewed

@@ -9,7 +9,7 @@ docstring
 import abc
 import numpy as np
-from sklearn.base import BaseEstimator
+from sklearn.base import BaseEstimator, RegressorMixin, ClassifierMixin
 class RegressorMixin(object):
@@ -102,7 +102,7 @@ class BaseModelAdapter(BaseEstimator):
         pass
-class ClassifierAdapter(BaseModelAdapter):
+class ClassifierAdapter(BaseModelAdapter, ClassifierMixin):
     def __init__(self, model, fit_params=None):
         super(ClassifierAdapter, self).__init__(model, fit_params)
@@ -110,7 +110,7 @@ class ClassifierAdapter(BaseModelAdapter):
         return self.model.predict_proba(x)
-class RegressorAdapter(BaseModelAdapter):
+class RegressorAdapter(BaseModelAdapter, RegressorMixin):
     def __init__(self, model, fit_params=None):
         super(RegressorAdapter, self).__init__(model, fit_params)

unifiedbooster-0.9.0/unifiedbooster/predictioninterval/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .predictioninterval import PredictionInterval
+__all__ = ["PredictionInterval"]

unifiedbooster-0.9.0/unifiedbooster/predictioninterval/predictioninterval.py ADDED Viewed

@@ -0,0 +1,314 @@
+from locale import normalize
+import numpy as np
+import pickle
+from collections import namedtuple
+from sklearn.base import BaseEstimator, RegressorMixin
+from sklearn.model_selection import train_test_split
+from sklearn.ensemble import ExtraTreesRegressor
+from sklearn.preprocessing import StandardScaler
+from sklearn.neighbors import KernelDensity
+from sklearn.model_selection import GridSearchCV
+from scipy.stats import gaussian_kde
+from tqdm import tqdm
+from ..nonconformist import IcpRegressor
+from ..nonconformist import RegressorNc
+from ..nonconformist import RegressorNormalizer, AbsErrorErrFunc
+class PredictionInterval(BaseEstimator, RegressorMixin):
+    """Class PredictionInterval: Obtain prediction intervals.
+    Attributes:
+        obj: an object;
+            fitted object containing methods `fit` and `predict`
+        method: a string;
+            method for constructing the prediction intervals.
+            Currently "splitconformal" (default) and "localconformal"
+        level: a float;
+            Confidence level for prediction intervals. Default is 95,
+            equivalent to a miscoverage error of 5 (%)
+        replications: an integer;
+            Number of replications for simulated conformal (default is `None`)
+        type_pi: a string;
+            type of prediction interval: currently "kde" (default) or "bootstrap"
+        type_split: a string;
+            "random" (random split of data) or "sequential" (sequential split of data)
+        seed: an integer;
+            Reproducibility of fit (there's a random split between fitting and calibration data)
+    """
+    def __init__(
+        self,
+        obj,
+        method="splitconformal",
+        level=95,
+        type_pi="bootstrap",
+        type_split="random",
+        replications=None,
+        kernel=None,
+        agg="mean",
+        seed=123,
+    ):
+        self.obj = obj
+        self.method = method
+        self.level = level
+        self.type_pi = type_pi
+        self.type_split = type_split
+        self.replications = replications
+        self.kernel = kernel
+        self.agg = agg
+        self.seed = seed
+        self.alpha_ = 1 - self.level / 100
+        self.quantile_ = None
+        self.icp_ = None
+        self.calibrated_residuals_ = None
+        self.scaled_calibrated_residuals_ = None
+        self.calibrated_residuals_scaler_ = None
+        self.kde_ = None
+    def fit(self, X, y):
+        """Fit the `method` to training data (X, y).
+        Args:
+            X: array-like, shape = [n_samples, n_features];
+                Training set vectors, where n_samples is the number
+                of samples and n_features is the number of features.
+            y: array-like, shape = [n_samples, ]; Target values.
+        """
+        if self.type_split == "random":
+            X_train, X_calibration, y_train, y_calibration = train_test_split(
+                X, y, test_size=0.5, random_state=self.seed
+            )
+        elif self.type_split == "sequential":
+            n_x = X.shape[0]
+            n_x_half = n_x // 2
+            first_half_idx = range(0, n_x_half)
+            second_half_idx = range(n_x_half, n_x)
+            X_train = X[first_half_idx, :]
+            X_calibration = X[second_half_idx, :]
+            y_train = y[first_half_idx]
+            y_calibration = y[second_half_idx]
+        if self.method == "splitconformal":
+            n_samples_calibration = X_calibration.shape[0]
+            self.obj.fit(X_train, y_train)
+            preds_calibration = self.obj.predict(X_calibration)
+            self.calibrated_residuals_ = y_calibration - preds_calibration
+            absolute_residuals = np.abs(self.calibrated_residuals_)
+            self.calibrated_residuals_scaler_ = StandardScaler(
+                with_mean=True, with_std=True
+            )
+            self.scaled_calibrated_residuals_ = (
+                self.calibrated_residuals_scaler_.fit_transform(
+                    self.calibrated_residuals_.reshape(-1, 1)
+                ).ravel()
+            )
+            try:
+                # numpy version >= 1.22
+                self.quantile_ = np.quantile(
+                    a=absolute_residuals, q=self.level / 100, method="higher"
+                )
+            except:
+                # numpy version < 1.22
+                self.quantile_ = np.quantile(
+                    a=absolute_residuals,
+                    q=self.level / 100,
+                    interpolation="higher",
+                )
+        if self.method == "localconformal":
+            mad_estimator = ExtraTreesRegressor()
+            normalizer = RegressorNormalizer(
+                self.obj, mad_estimator, AbsErrorErrFunc()
+            )
+            nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer)
+            self.icp_ = IcpRegressor(nc)
+            self.icp_.fit(X_train, y_train)
+            self.icp_.calibrate(X_calibration, y_calibration)
+        return self
+    def predict(self, X, return_pi=False):
+        """Obtain predictions and prediction intervals
+        Args:
+            X: array-like, shape = [n_samples, n_features];
+                Testing set vectors, where n_samples is the number
+                of samples and n_features is the number of features.
+            return_pi: boolean
+                Whether the prediction interval is returned or not.
+                Default is False, for compatibility with other _estimators_.
+                If True, a tuple containing the predictions + lower and upper
+                bounds is returned.
+        """
+        pred = self.obj.predict(X)
+        if self.method == "splitconformal":
+            if self.replications is None:
+                if return_pi:
+                    DescribeResult = namedtuple(
+                        "DescribeResult", ("mean", "lower", "upper")
+                    )
+                    return DescribeResult(
+                        pred, pred - self.quantile_, pred + self.quantile_
+                    )
+                else:
+                    return pred
+            else:  # if self.replications is not None
+                assert self.type_pi in (
+                    "bootstrap",
+                    "kde",
+                ), "`self.type_pi` must be in ('bootstrap', 'kde')"
+                if self.type_pi == "bootstrap":
+                    np.random.seed(self.seed)
+                    self.residuals_sims_ = np.asarray(
+                        [
+                            np.random.choice(
+                                a=self.scaled_calibrated_residuals_,
+                                size=X.shape[0],
+                            )
+                            for _ in range(self.replications)
+                        ]
+                    ).T
+                    self.sims_ = np.asarray(
+                        [
+                            pred
+                            + self.calibrated_residuals_scaler_.scale_[0]
+                            * self.residuals_sims_[:, i].ravel()
+                            for i in range(self.replications)
+                        ]
+                    ).T
+                elif self.type_pi == "kde":
+                    self.kde_ = gaussian_kde(
+                        dataset=self.scaled_calibrated_residuals_
+                    )
+                    self.sims_ = np.asarray(
+                        [
+                            pred
+                            + self.calibrated_residuals_scaler_.scale_[0]
+                            * self.kde_.resample(
+                                size=X.shape[0], seed=self.seed + i
+                            ).ravel()
+                            for i in range(self.replications)
+                        ]
+                    ).T
+                self.mean_ = np.mean(self.sims_, axis=1)
+                self.lower_ = np.quantile(
+                    self.sims_, q=self.alpha_ / 200, axis=1
+                )
+                self.upper_ = np.quantile(
+                    self.sims_, q=1 - self.alpha_ / 200, axis=1
+                )
+                DescribeResult = namedtuple(
+                    "DescribeResult", ("mean", "sims", "lower", "upper")
+                )
+                return DescribeResult(
+                    self.mean_, self.sims_, self.lower_, self.upper_
+                )
+        if self.method == "localconformal":
+            if self.replications is None:
+                if return_pi:
+                    predictions_bounds = self.icp_.predict(
+                        X, significance=1 - self.level
+                    )
+                    DescribeResult = namedtuple(
+                        "DescribeResult", ("mean", "lower", "upper")
+                    )
+                    return DescribeResult(
+                        pred, predictions_bounds[:, 0], predictions_bounds[:, 1]
+                    )
+                else:
+                    return pred
+            else:  # if self.replications is not None
+                assert self.type_pi in (
+                    "bootstrap",
+                    "kde",
+                ), "`self.type_pi` must be in ('bootstrap', 'kde')"
+                if self.type_pi == "bootstrap":
+                    np.random.seed(self.seed)
+                    self.residuals_sims_ = np.asarray(
+                        [
+                            np.random.choice(
+                                a=self.scaled_calibrated_residuals_,
+                                size=X.shape[0],
+                            )
+                            for _ in range(self.replications)
+                        ]
+                    ).T
+                    self.sims_ = np.asarray(
+                        [
+                            pred
+                            + self.calibrated_residuals_scaler_.scale_[0]
+                            * self.residuals_sims_[:, i].ravel()
+                            for i in tqdm(range(self.replications))
+                        ]
+                    ).T
+                elif self.type_pi == "kde":
+                    self.kde_ = gaussian_kde(
+                        dataset=self.scaled_calibrated_residuals_
+                    )
+                    self.sims_ = np.asarray(
+                        [
+                            pred
+                            + self.calibrated_residuals_scaler_.scale_[0]
+                            * self.kde_.resample(
+                                size=X.shape[0], seed=self.seed + i
+                            ).ravel()
+                            for i in tqdm(range(self.replications))
+                        ]
+                    ).T
+                self.mean_ = np.mean(self.sims_, axis=1)
+                self.lower_ = np.quantile(
+                    self.sims_, q=self.alpha_ / 200, axis=1
+                )
+                self.upper_ = np.quantile(
+                    self.sims_, q=1 - self.alpha_ / 200, axis=1
+                )
+                DescribeResult = namedtuple(
+                    "DescribeResult", ("mean", "sims", "lower", "upper")
+                )
+                return DescribeResult(
+                    self.mean_, self.sims_, self.lower_, self.upper_
+                )

unifiedbooster-0.9.0/unifiedbooster/predictionset/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .predictionset import PredictionSet
+__all__ = ["PredictionSet"]

unifiedbooster-0.9.0/unifiedbooster/predictionset/predictionset.py ADDED Viewed

@@ -0,0 +1,113 @@
+from locale import normalize
+import numpy as np
+import pickle
+from collections import namedtuple
+from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.model_selection import train_test_split
+from sklearn.ensemble import ExtraTreesRegressor
+from sklearn.preprocessing import StandardScaler
+from scipy.stats import gaussian_kde
+from tqdm import tqdm
+from ..nonconformist import (
+    ClassifierAdapter,
+    IcpClassifier,
+    TcpClassifier,
+    ClassifierNc,
+    MarginErrFunc,
+)
+class PredictionSet(BaseEstimator, ClassifierMixin):
+    """Class PredictionSet: Obtain prediction sets.
+    Attributes:
+        obj: an object;
+            fitted object containing methods `fit` and `predict`
+        method: a string;
+            method for constructing the prediction sets.
+            Currently "icp" (default, inductive conformal) and "tcp" (transductive conformal)
+        level: a float;
+            Confidence level for prediction sets. Default is None,
+            95 is equivalent to a miscoverage error of 5 (%)
+        seed: an integer;
+            Reproducibility of fit (there's a random split between fitting and calibration data)
+    """
+    def __init__(
+        self,
+        obj,
+        method="icp",
+        level=None,
+        seed=123,
+    ):
+        self.obj = obj
+        self.method = method
+        self.level = level
+        self.seed = seed
+        if self.level is not None:
+            self.alpha_ = 1 - self.level / 100
+        self.quantile_ = None
+        self.icp_ = None
+        self.tcp_ = None
+        if self.method == "icp":
+            self.icp_ = IcpClassifier(
+                ClassifierNc(ClassifierAdapter(self.obj), MarginErrFunc()),
+            )
+        elif self.method == "tcp":
+            self.tcp_ = TcpClassifier(
+                ClassifierNc(ClassifierAdapter(self.obj), MarginErrFunc()),
+            )
+        else:
+            raise ValueError("`self.method` must be in ('icp', 'tcp')")
+    def fit(self, X, y):
+        """Fit the `method` to training data (X, y).
+        Args:
+            X: array-like, shape = [n_samples, n_features];
+                Training set vectors, where n_samples is the number
+                of samples and n_features is the number of features.
+            y: array-like, shape = [n_samples, ]; Target values.
+        """
+        if self.method == "icp":
+            X_train, X_calibration, y_train, y_calibration = train_test_split(
+                X, y, test_size=0.5, random_state=self.seed
+            )
+            self.icp_.fit(X_train, y_train)
+            self.icp_.calibrate(X_calibration, y_calibration)
+        elif self.method == "tcp":
+            self.tcp_.fit(X, y)
+        return self
+    def predict(self, X):
+        """Obtain predictions and prediction sets
+        Args:
+            X: array-like, shape = [n_samples, n_features];
+                Testing set vectors, where n_samples is the number
+                of samples and n_features is the number of features.
+        """
+        if self.method == "icp":
+            return self.icp_.predict(X, significance=self.alpha_)
+        elif self.method == "tcp":
+            return self.tcp_.predict(X, significance=self.alpha_)
+        else:
+            raise ValueError("`self.method` must be in ('icp', 'tcp')")

{unifiedbooster-0.6.0 → unifiedbooster-0.9.0}/unifiedbooster.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: unifiedbooster
-Version: 0.6.0
+Version: 0.9.0
 Summary: Unified interface for Gradient Boosted Decision Trees
 Home-page: https://github.com/thierrymoudiki/unifiedbooster
 Author: T. Moudiki
@@ -22,8 +22,18 @@ Requires-Dist: numpy
 Requires-Dist: scikit-learn
 Requires-Dist: xgboost
 Requires-Dist: lightgbm
-Requires-Dist: catboost
 Requires-Dist: GPopt
 Requires-Dist: nnetsauce
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: home-page
+Dynamic: keywords
+Dynamic: license
+Dynamic: license-file
+Dynamic: requires-dist
+Dynamic: requires-python
+Dynamic: summary
 Unified interface for Gradient Boosted Decision Trees

{unifiedbooster-0.6.0 → unifiedbooster-0.9.0}/unifiedbooster.egg-info/SOURCES.txt RENAMED Viewed

@@ -20,4 +20,8 @@ unifiedbooster/nonconformist/cp.py
 unifiedbooster/nonconformist/evaluation.py
 unifiedbooster/nonconformist/icp.py
 unifiedbooster/nonconformist/nc.py
-unifiedbooster/nonconformist/util.py
+unifiedbooster/nonconformist/util.py
+unifiedbooster/predictioninterval/__init__.py
+unifiedbooster/predictioninterval/predictioninterval.py
+unifiedbooster/predictionset/__init__.py
+unifiedbooster/predictionset/predictionset.py