PyPI - unifiedbooster - Versions diffs - 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

unifiedbooster 0.5.0py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

unifiedbooster/gbdt.py +9 -5
unifiedbooster/gbdt_classification.py +43 -11
unifiedbooster/gbdt_regression.py +45 -12
unifiedbooster/gpoptimization.py +126 -61
unifiedbooster/nonconformist/__init__.py +36 -0
unifiedbooster/nonconformist/acp.py +381 -0
unifiedbooster/nonconformist/base.py +156 -0
unifiedbooster/nonconformist/cp.py +172 -0
unifiedbooster/nonconformist/evaluation.py +486 -0
unifiedbooster/nonconformist/icp.py +442 -0
unifiedbooster/nonconformist/nc.py +610 -0
unifiedbooster/nonconformist/util.py +9 -0
unifiedbooster/predictioninterval/__init__.py +3 -0
unifiedbooster/predictioninterval/predictioninterval.py +314 -0
unifiedbooster/predictionset/__init__.py +3 -0
unifiedbooster/predictionset/predictionset.py +111 -0
{unifiedbooster-0.5.0.dist-info → unifiedbooster-0.7.0.dist-info}/METADATA +1 -1
unifiedbooster-0.7.0.dist-info/RECORD +23 -0
{unifiedbooster-0.5.0.dist-info → unifiedbooster-0.7.0.dist-info}/WHEEL +1 -1
unifiedbooster-0.5.0.dist-info/RECORD +0 -11
{unifiedbooster-0.5.0.dist-info → unifiedbooster-0.7.0.dist-info}/LICENSE +0 -0
{unifiedbooster-0.5.0.dist-info → unifiedbooster-0.7.0.dist-info}/entry_points.txt +0 -0
{unifiedbooster-0.5.0.dist-info → unifiedbooster-0.7.0.dist-info}/top_level.txt +0 -0

unifiedbooster/predictioninterval/predictioninterval.py ADDED Viewed

@@ -0,0 +1,314 @@
+from locale import normalize
+import numpy as np
+import pickle
+from collections import namedtuple
+from sklearn.base import BaseEstimator, RegressorMixin
+from sklearn.model_selection import train_test_split
+from sklearn.ensemble import ExtraTreesRegressor
+from sklearn.preprocessing import StandardScaler
+from sklearn.neighbors import KernelDensity
+from sklearn.model_selection import GridSearchCV
+from scipy.stats import gaussian_kde
+from tqdm import tqdm
+from ..nonconformist import IcpRegressor
+from ..nonconformist import RegressorNc
+from ..nonconformist import RegressorNormalizer, AbsErrorErrFunc
+class PredictionInterval(BaseEstimator, RegressorMixin):
+    """Class PredictionInterval: Obtain prediction intervals.
+    Attributes:
+        obj: an object;
+            fitted object containing methods `fit` and `predict`
+        method: a string;
+            method for constructing the prediction intervals.
+            Currently "splitconformal" (default) and "localconformal"
+        level: a float;
+            Confidence level for prediction intervals. Default is 95,
+            equivalent to a miscoverage error of 5 (%)
+        replications: an integer;
+            Number of replications for simulated conformal (default is `None`)
+        type_pi: a string;
+            type of prediction interval: currently "kde" (default) or "bootstrap"
+        type_split: a string;
+            "random" (random split of data) or "sequential" (sequential split of data)
+        seed: an integer;
+            Reproducibility of fit (there's a random split between fitting and calibration data)
+    """
+    def __init__(
+        self,
+        obj,
+        method="splitconformal",
+        level=95,
+        type_pi="bootstrap",
+        type_split="random",
+        replications=None,
+        kernel=None,
+        agg="mean",
+        seed=123,
+    ):
+        self.obj = obj
+        self.method = method
+        self.level = level
+        self.type_pi = type_pi
+        self.type_split = type_split
+        self.replications = replications
+        self.kernel = kernel
+        self.agg = agg
+        self.seed = seed
+        self.alpha_ = 1 - self.level / 100
+        self.quantile_ = None
+        self.icp_ = None
+        self.calibrated_residuals_ = None
+        self.scaled_calibrated_residuals_ = None
+        self.calibrated_residuals_scaler_ = None
+        self.kde_ = None
+    def fit(self, X, y):
+        """Fit the `method` to training data (X, y).
+        Args:
+            X: array-like, shape = [n_samples, n_features];
+                Training set vectors, where n_samples is the number
+                of samples and n_features is the number of features.
+            y: array-like, shape = [n_samples, ]; Target values.
+        """
+        if self.type_split == "random":
+            X_train, X_calibration, y_train, y_calibration = train_test_split(
+                X, y, test_size=0.5, random_state=self.seed
+            )
+        elif self.type_split == "sequential":
+            n_x = X.shape[0]
+            n_x_half = n_x // 2
+            first_half_idx = range(0, n_x_half)
+            second_half_idx = range(n_x_half, n_x)
+            X_train = X[first_half_idx, :]
+            X_calibration = X[second_half_idx, :]
+            y_train = y[first_half_idx]
+            y_calibration = y[second_half_idx]
+        if self.method == "splitconformal":
+            n_samples_calibration = X_calibration.shape[0]
+            self.obj.fit(X_train, y_train)
+            preds_calibration = self.obj.predict(X_calibration)
+            self.calibrated_residuals_ = y_calibration - preds_calibration
+            absolute_residuals = np.abs(self.calibrated_residuals_)
+            self.calibrated_residuals_scaler_ = StandardScaler(
+                with_mean=True, with_std=True
+            )
+            self.scaled_calibrated_residuals_ = (
+                self.calibrated_residuals_scaler_.fit_transform(
+                    self.calibrated_residuals_.reshape(-1, 1)
+                ).ravel()
+            )
+            try:
+                # numpy version >= 1.22
+                self.quantile_ = np.quantile(
+                    a=absolute_residuals, q=self.level / 100, method="higher"
+                )
+            except:
+                # numpy version < 1.22
+                self.quantile_ = np.quantile(
+                    a=absolute_residuals,
+                    q=self.level / 100,
+                    interpolation="higher",
+                )
+        if self.method == "localconformal":
+            mad_estimator = ExtraTreesRegressor()
+            normalizer = RegressorNormalizer(
+                self.obj, mad_estimator, AbsErrorErrFunc()
+            )
+            nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer)
+            self.icp_ = IcpRegressor(nc)
+            self.icp_.fit(X_train, y_train)
+            self.icp_.calibrate(X_calibration, y_calibration)
+        return self
+    def predict(self, X, return_pi=False):
+        """Obtain predictions and prediction intervals
+        Args:
+            X: array-like, shape = [n_samples, n_features];
+                Testing set vectors, where n_samples is the number
+                of samples and n_features is the number of features.
+            return_pi: boolean
+                Whether the prediction interval is returned or not.
+                Default is False, for compatibility with other _estimators_.
+                If True, a tuple containing the predictions + lower and upper
+                bounds is returned.
+        """
+        pred = self.obj.predict(X)
+        if self.method == "splitconformal":
+            if self.replications is None:
+                if return_pi:
+                    DescribeResult = namedtuple(
+                        "DescribeResult", ("mean", "lower", "upper")
+                    )
+                    return DescribeResult(
+                        pred, pred - self.quantile_, pred + self.quantile_
+                    )
+                else:
+                    return pred
+            else:  # if self.replications is not None
+                assert self.type_pi in (
+                    "bootstrap",
+                    "kde",
+                ), "`self.type_pi` must be in ('bootstrap', 'kde')"
+                if self.type_pi == "bootstrap":
+                    np.random.seed(self.seed)
+                    self.residuals_sims_ = np.asarray(
+                        [
+                            np.random.choice(
+                                a=self.scaled_calibrated_residuals_,
+                                size=X.shape[0],
+                            )
+                            for _ in range(self.replications)
+                        ]
+                    ).T
+                    self.sims_ = np.asarray(
+                        [
+                            pred
+                            + self.calibrated_residuals_scaler_.scale_[0]
+                            * self.residuals_sims_[:, i].ravel()
+                            for i in range(self.replications)
+                        ]
+                    ).T
+                elif self.type_pi == "kde":
+                    self.kde_ = gaussian_kde(
+                        dataset=self.scaled_calibrated_residuals_
+                    )
+                    self.sims_ = np.asarray(
+                        [
+                            pred
+                            + self.calibrated_residuals_scaler_.scale_[0]
+                            * self.kde_.resample(
+                                size=X.shape[0], seed=self.seed + i
+                            ).ravel()
+                            for i in range(self.replications)
+                        ]
+                    ).T
+                self.mean_ = np.mean(self.sims_, axis=1)
+                self.lower_ = np.quantile(
+                    self.sims_, q=self.alpha_ / 200, axis=1
+                )
+                self.upper_ = np.quantile(
+                    self.sims_, q=1 - self.alpha_ / 200, axis=1
+                )
+                DescribeResult = namedtuple(
+                    "DescribeResult", ("mean", "sims", "lower", "upper")
+                )
+                return DescribeResult(
+                    self.mean_, self.sims_, self.lower_, self.upper_
+                )
+        if self.method == "localconformal":
+            if self.replications is None:
+                if return_pi:
+                    predictions_bounds = self.icp_.predict(
+                        X, significance=1 - self.level
+                    )
+                    DescribeResult = namedtuple(
+                        "DescribeResult", ("mean", "lower", "upper")
+                    )
+                    return DescribeResult(
+                        pred, predictions_bounds[:, 0], predictions_bounds[:, 1]
+                    )
+                else:
+                    return pred
+            else:  # if self.replications is not None
+                assert self.type_pi in (
+                    "bootstrap",
+                    "kde",
+                ), "`self.type_pi` must be in ('bootstrap', 'kde')"
+                if self.type_pi == "bootstrap":
+                    np.random.seed(self.seed)
+                    self.residuals_sims_ = np.asarray(
+                        [
+                            np.random.choice(
+                                a=self.scaled_calibrated_residuals_,
+                                size=X.shape[0],
+                            )
+                            for _ in range(self.replications)
+                        ]
+                    ).T
+                    self.sims_ = np.asarray(
+                        [
+                            pred
+                            + self.calibrated_residuals_scaler_.scale_[0]
+                            * self.residuals_sims_[:, i].ravel()
+                            for i in tqdm(range(self.replications))
+                        ]
+                    ).T
+                elif self.type_pi == "kde":
+                    self.kde_ = gaussian_kde(
+                        dataset=self.scaled_calibrated_residuals_
+                    )
+                    self.sims_ = np.asarray(
+                        [
+                            pred
+                            + self.calibrated_residuals_scaler_.scale_[0]
+                            * self.kde_.resample(
+                                size=X.shape[0], seed=self.seed + i
+                            ).ravel()
+                            for i in tqdm(range(self.replications))
+                        ]
+                    ).T
+                self.mean_ = np.mean(self.sims_, axis=1)
+                self.lower_ = np.quantile(
+                    self.sims_, q=self.alpha_ / 200, axis=1
+                )
+                self.upper_ = np.quantile(
+                    self.sims_, q=1 - self.alpha_ / 200, axis=1
+                )
+                DescribeResult = namedtuple(
+                    "DescribeResult", ("mean", "sims", "lower", "upper")
+                )
+                return DescribeResult(
+                    self.mean_, self.sims_, self.lower_, self.upper_
+                )

unifiedbooster/predictionset/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .predictionset import PredictionSet
+__all__ = ["PredictionSet"]

unifiedbooster/predictionset/predictionset.py ADDED Viewed

@@ -0,0 +1,111 @@
+from locale import normalize
+import numpy as np
+import pickle
+from collections import namedtuple
+from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.model_selection import train_test_split
+from sklearn.ensemble import ExtraTreesRegressor
+from sklearn.preprocessing import StandardScaler
+from scipy.stats import gaussian_kde
+from tqdm import tqdm
+from ..nonconformist import ClassifierAdapter, IcpClassifier, TcpClassifier, ClassifierNc, MarginErrFunc
+class PredictionSet(BaseEstimator, ClassifierMixin):
+    """Class PredictionSet: Obtain prediction sets.
+    Attributes:
+        obj: an object;
+            fitted object containing methods `fit` and `predict`
+        method: a string;
+            method for constructing the prediction sets.
+            Currently "icp" (default, inductive conformal) and "tcp" (transductive conformal)
+        level: a float;
+            Confidence level for prediction sets. Default is None,
+            95 is equivalent to a miscoverage error of 5 (%)
+        seed: an integer;
+            Reproducibility of fit (there's a random split between fitting and calibration data)
+    """
+    def __init__(
+        self,
+        obj,
+        method="icp",
+        level=None,
+        seed=123,
+    ):
+        self.obj = obj
+        self.method = method
+        self.level = level
+        self.seed = seed
+        if self.level is not None:
+            self.alpha_ = 1 - self.level / 100
+        self.quantile_ = None
+        self.icp_ = None
+        self.tcp_ = None
+        if self.method == "icp":
+            self.icp_ = IcpClassifier(
+                ClassifierNc(ClassifierAdapter(self.obj), MarginErrFunc()),
+            )
+        elif self.method == "tcp":
+            self.tcp_ = TcpClassifier(
+                ClassifierNc(ClassifierAdapter(self.obj), MarginErrFunc()),
+            )
+        else:
+            raise ValueError(
+                "`self.method` must be in ('icp', 'tcp')"
+            )
+    def fit(self, X, y):
+        """Fit the `method` to training data (X, y).
+        Args:
+            X: array-like, shape = [n_samples, n_features];
+                Training set vectors, where n_samples is the number
+                of samples and n_features is the number of features.
+            y: array-like, shape = [n_samples, ]; Target values.
+        """
+        if self.method == "icp":
+            X_train, X_calibration, y_train, y_calibration = train_test_split(
+            X, y, test_size=0.5, random_state=self.seed)
+            self.icp_.fit(X_train, y_train)
+            self.icp_.calibrate(X_calibration, y_calibration)
+        elif self.method == "tcp":
+            self.tcp_.fit(X, y)
+        return self
+    def predict(self, X):
+        """Obtain predictions and prediction sets
+        Args:
+            X: array-like, shape = [n_samples, n_features];
+                Testing set vectors, where n_samples is the number
+                of samples and n_features is the number of features.
+        """
+        if self.method == "icp":
+            return self.icp_.predict(X, significance=self.alpha_)
+        elif self.method == "tcp":
+            return self.tcp_.predict(X, significance=self.alpha_)
+        else:
+            raise ValueError(
+                "`self.method` must be in ('icp', 'tcp')"
+            )

{unifiedbooster-0.5.0.dist-info → unifiedbooster-0.7.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: unifiedbooster
-Version: 0.5.0
+Version: 0.7.0
 Summary: Unified interface for Gradient Boosted Decision Trees
 Home-page: https://github.com/thierrymoudiki/unifiedbooster
 Author: T. Moudiki

unifiedbooster-0.7.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,23 @@
+unifiedbooster/__init__.py,sha256=8FEkWCZ2tT8xcW46Z0X_BS9_r0kQWVAu37IncLq6QWU,301
+unifiedbooster/gbdt.py,sha256=oAG-dQRY3FG9Tdhdb0iZuupMOAj1_KcGQbp47AHc72o,5175
+unifiedbooster/gbdt_classification.py,sha256=c9MYlPeTjQ4pAy0CZtroid9UfhQAlQVHekCWbbTIMBQ,5798
+unifiedbooster/gbdt_regression.py,sha256=tHi8XJ1jS2LuXdQoRDsTkFK3qt3L-4kQ9IRsMNW37gI,5351
+unifiedbooster/gpoptimization.py,sha256=UoT20E5dfhREiY7Cqo0vCktBzDBRnnG_6Xyg426vdfk,15238
+unifiedbooster/nonconformist/__init__.py,sha256=rETO9FfHb_yWs4ttLa2FJb2NAy-KFnyESeBTltDwJQA,739
+unifiedbooster/nonconformist/acp.py,sha256=SrfBVCWjXvntkBJ7GXTFYE6i6NU3Pv-5ibwhpItDKDw,11553
+unifiedbooster/nonconformist/base.py,sha256=Ycyt6pwxo0QjD3qBAfDqjzFvFfknIMkX0_yIc6EtPFo,4028
+unifiedbooster/nonconformist/cp.py,sha256=YKiBFKwvaJbWnJcgi-saiVD_2ci-LBDHgytf70jHvFg,6174
+unifiedbooster/nonconformist/evaluation.py,sha256=b24buhhW3v3CKRSi69WKCq9Sb38Unmjr8TAZr66Cdns,15906
+unifiedbooster/nonconformist/icp.py,sha256=wqOaoy22KiF2ebVQOjp8MR-zvEjT0hE0NiMfeNZOQEw,15982
+unifiedbooster/nonconformist/nc.py,sha256=_ED8Yn068Ivio9Xr0SjwKh4Ts5MfUACZFY40ObxPJ60,19644
+unifiedbooster/nonconformist/util.py,sha256=UBKlAEb0mj9MVWBOKCRAq_OQP5Z53FMqWlTyo7RIg5Q,242
+unifiedbooster/predictioninterval/__init__.py,sha256=I1X1omp6Bsuzfm7z8TCSICe2175rHrdoXWEDOicOP8U,85
+unifiedbooster/predictioninterval/predictioninterval.py,sha256=6XQnJQDpsWG-uu5yFxeZQewnrErAjZLzv21YvtarXZQ,11164
+unifiedbooster/predictionset/__init__.py,sha256=IGhWVX8-VeZ15HeLFWu8QeKCz7DIE4TlEXMjTnB3VdE,70
+unifiedbooster/predictionset/predictionset.py,sha256=k9s2PYK2KvOfDoGfSGXUHwwNA9kL2VYiT2JPokwZ8YA,3415
+unifiedbooster-0.7.0.dist-info/LICENSE,sha256=3rWw63btcdqbC0XMnpzCQhxDP8Vx7yKkKS7EDgJiY_4,1061
+unifiedbooster-0.7.0.dist-info/METADATA,sha256=7vR-c8aCOeF-96Uv9uBTugKmA-QC71b_5NyejATpnDM,955
+unifiedbooster-0.7.0.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
+unifiedbooster-0.7.0.dist-info/entry_points.txt,sha256=OVNTsCzMYnaJ11WIByB7G8Lym_dj-ERKZyQxWFUcW30,59
+unifiedbooster-0.7.0.dist-info/top_level.txt,sha256=gOMxxpRtx8_nJXTWsXJDFkNeCsjSJQPs6aUXKK5_nI4,15
+unifiedbooster-0.7.0.dist-info/RECORD,,

{unifiedbooster-0.5.0.dist-info → unifiedbooster-0.7.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (72.1.0)
+Generator: setuptools (74.0.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

unifiedbooster-0.5.0.dist-info/RECORD DELETED Viewed

@@ -1,11 +0,0 @@
-unifiedbooster/__init__.py,sha256=8FEkWCZ2tT8xcW46Z0X_BS9_r0kQWVAu37IncLq6QWU,301
-unifiedbooster/gbdt.py,sha256=u5Sjw-V8BlDS4LUo_SNOfuz66EFcJhP1Al6Es41R_X8,4932
-unifiedbooster/gbdt_classification.py,sha256=wifw86cUvsyiKSz8MTxIgH6j7Gd1voIxXUiJVsE68bk,4219
-unifiedbooster/gbdt_regression.py,sha256=YQIDtW4hV7DxHAHuoMMkD1aRy0dzVXxx2rwPu3InTA8,3710
-unifiedbooster/gpoptimization.py,sha256=S-yZI8qI_QZyoCqWj8MT0a2Djlo3YrYRjyXApLS9FXM,12831
-unifiedbooster-0.5.0.dist-info/LICENSE,sha256=3rWw63btcdqbC0XMnpzCQhxDP8Vx7yKkKS7EDgJiY_4,1061
-unifiedbooster-0.5.0.dist-info/METADATA,sha256=mao-q4w_f26KVwKSy4ZPEJBZQIRARtXsWEN7t7JEwRw,955
-unifiedbooster-0.5.0.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
-unifiedbooster-0.5.0.dist-info/entry_points.txt,sha256=OVNTsCzMYnaJ11WIByB7G8Lym_dj-ERKZyQxWFUcW30,59
-unifiedbooster-0.5.0.dist-info/top_level.txt,sha256=gOMxxpRtx8_nJXTWsXJDFkNeCsjSJQPs6aUXKK5_nI4,15
-unifiedbooster-0.5.0.dist-info/RECORD,,

{unifiedbooster-0.5.0.dist-info → unifiedbooster-0.7.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{unifiedbooster-0.5.0.dist-info → unifiedbooster-0.7.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{unifiedbooster-0.5.0.dist-info → unifiedbooster-0.7.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

unifiedbooster 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

unifiedbooster 0.5.0py3-none-any.whl → 0.7.0py3-none-any.whl