PyPI - unifiedbooster - Versions diffs - 0.9.0__py3-none-any.whl - Mend

unifiedbooster 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

unifiedbooster/__init__.py +12 -0
unifiedbooster/gbdt.py +161 -0
unifiedbooster/gbdt_classification.py +188 -0
unifiedbooster/gbdt_regression.py +180 -0
unifiedbooster/gpoptimization.py +493 -0
unifiedbooster/nonconformist/__init__.py +36 -0
unifiedbooster/nonconformist/acp.py +381 -0
unifiedbooster/nonconformist/base.py +156 -0
unifiedbooster/nonconformist/cp.py +172 -0
unifiedbooster/nonconformist/evaluation.py +486 -0
unifiedbooster/nonconformist/icp.py +442 -0
unifiedbooster/nonconformist/nc.py +610 -0
unifiedbooster/nonconformist/util.py +9 -0
unifiedbooster/predictioninterval/__init__.py +3 -0
unifiedbooster/predictioninterval/predictioninterval.py +314 -0
unifiedbooster/predictionset/__init__.py +3 -0
unifiedbooster/predictionset/predictionset.py +113 -0
unifiedbooster-0.9.0.dist-info/METADATA +39 -0
unifiedbooster-0.9.0.dist-info/RECORD +23 -0
unifiedbooster-0.9.0.dist-info/WHEEL +5 -0
unifiedbooster-0.9.0.dist-info/entry_points.txt +2 -0
unifiedbooster-0.9.0.dist-info/licenses/LICENSE +7 -0
unifiedbooster-0.9.0.dist-info/top_level.txt +1 -0

unifiedbooster/predictioninterval/predictioninterval.py ADDED Viewed

@@ -0,0 +1,314 @@
+from locale import normalize
+import numpy as np
+import pickle
+from collections import namedtuple
+from sklearn.base import BaseEstimator, RegressorMixin
+from sklearn.model_selection import train_test_split
+from sklearn.ensemble import ExtraTreesRegressor
+from sklearn.preprocessing import StandardScaler
+from sklearn.neighbors import KernelDensity
+from sklearn.model_selection import GridSearchCV
+from scipy.stats import gaussian_kde
+from tqdm import tqdm
+from ..nonconformist import IcpRegressor
+from ..nonconformist import RegressorNc
+from ..nonconformist import RegressorNormalizer, AbsErrorErrFunc
+class PredictionInterval(BaseEstimator, RegressorMixin):
+    """Class PredictionInterval: Obtain prediction intervals.
+    Attributes:
+        obj: an object;
+            fitted object containing methods `fit` and `predict`
+        method: a string;
+            method for constructing the prediction intervals.
+            Currently "splitconformal" (default) and "localconformal"
+        level: a float;
+            Confidence level for prediction intervals. Default is 95,
+            equivalent to a miscoverage error of 5 (%)
+        replications: an integer;
+            Number of replications for simulated conformal (default is `None`)
+        type_pi: a string;
+            type of prediction interval: currently "kde" (default) or "bootstrap"
+        type_split: a string;
+            "random" (random split of data) or "sequential" (sequential split of data)
+        seed: an integer;
+            Reproducibility of fit (there's a random split between fitting and calibration data)
+    """
+    def __init__(
+        self,
+        obj,
+        method="splitconformal",
+        level=95,
+        type_pi="bootstrap",
+        type_split="random",
+        replications=None,
+        kernel=None,
+        agg="mean",
+        seed=123,
+    ):
+        self.obj = obj
+        self.method = method
+        self.level = level
+        self.type_pi = type_pi
+        self.type_split = type_split
+        self.replications = replications
+        self.kernel = kernel
+        self.agg = agg
+        self.seed = seed
+        self.alpha_ = 1 - self.level / 100
+        self.quantile_ = None
+        self.icp_ = None
+        self.calibrated_residuals_ = None
+        self.scaled_calibrated_residuals_ = None
+        self.calibrated_residuals_scaler_ = None
+        self.kde_ = None
+    def fit(self, X, y):
+        """Fit the `method` to training data (X, y).
+        Args:
+            X: array-like, shape = [n_samples, n_features];
+                Training set vectors, where n_samples is the number
+                of samples and n_features is the number of features.
+            y: array-like, shape = [n_samples, ]; Target values.
+        """
+        if self.type_split == "random":
+            X_train, X_calibration, y_train, y_calibration = train_test_split(
+                X, y, test_size=0.5, random_state=self.seed
+            )
+        elif self.type_split == "sequential":
+            n_x = X.shape[0]
+            n_x_half = n_x // 2
+            first_half_idx = range(0, n_x_half)
+            second_half_idx = range(n_x_half, n_x)
+            X_train = X[first_half_idx, :]
+            X_calibration = X[second_half_idx, :]
+            y_train = y[first_half_idx]
+            y_calibration = y[second_half_idx]
+        if self.method == "splitconformal":
+            n_samples_calibration = X_calibration.shape[0]
+            self.obj.fit(X_train, y_train)
+            preds_calibration = self.obj.predict(X_calibration)
+            self.calibrated_residuals_ = y_calibration - preds_calibration
+            absolute_residuals = np.abs(self.calibrated_residuals_)
+            self.calibrated_residuals_scaler_ = StandardScaler(
+                with_mean=True, with_std=True
+            )
+            self.scaled_calibrated_residuals_ = (
+                self.calibrated_residuals_scaler_.fit_transform(
+                    self.calibrated_residuals_.reshape(-1, 1)
+                ).ravel()
+            )
+            try:
+                # numpy version >= 1.22
+                self.quantile_ = np.quantile(
+                    a=absolute_residuals, q=self.level / 100, method="higher"
+                )
+            except:
+                # numpy version < 1.22
+                self.quantile_ = np.quantile(
+                    a=absolute_residuals,
+                    q=self.level / 100,
+                    interpolation="higher",
+                )
+        if self.method == "localconformal":
+            mad_estimator = ExtraTreesRegressor()
+            normalizer = RegressorNormalizer(
+                self.obj, mad_estimator, AbsErrorErrFunc()
+            )
+            nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer)
+            self.icp_ = IcpRegressor(nc)
+            self.icp_.fit(X_train, y_train)
+            self.icp_.calibrate(X_calibration, y_calibration)
+        return self
+    def predict(self, X, return_pi=False):
+        """Obtain predictions and prediction intervals
+        Args:
+            X: array-like, shape = [n_samples, n_features];
+                Testing set vectors, where n_samples is the number
+                of samples and n_features is the number of features.
+            return_pi: boolean
+                Whether the prediction interval is returned or not.
+                Default is False, for compatibility with other _estimators_.
+                If True, a tuple containing the predictions + lower and upper
+                bounds is returned.
+        """
+        pred = self.obj.predict(X)
+        if self.method == "splitconformal":
+            if self.replications is None:
+                if return_pi:
+                    DescribeResult = namedtuple(
+                        "DescribeResult", ("mean", "lower", "upper")
+                    )
+                    return DescribeResult(
+                        pred, pred - self.quantile_, pred + self.quantile_
+                    )
+                else:
+                    return pred
+            else:  # if self.replications is not None
+                assert self.type_pi in (
+                    "bootstrap",
+                    "kde",
+                ), "`self.type_pi` must be in ('bootstrap', 'kde')"
+                if self.type_pi == "bootstrap":
+                    np.random.seed(self.seed)
+                    self.residuals_sims_ = np.asarray(
+                        [
+                            np.random.choice(
+                                a=self.scaled_calibrated_residuals_,
+                                size=X.shape[0],
+                            )
+                            for _ in range(self.replications)
+                        ]
+                    ).T
+                    self.sims_ = np.asarray(
+                        [
+                            pred
+                            + self.calibrated_residuals_scaler_.scale_[0]
+                            * self.residuals_sims_[:, i].ravel()
+                            for i in range(self.replications)
+                        ]
+                    ).T
+                elif self.type_pi == "kde":
+                    self.kde_ = gaussian_kde(
+                        dataset=self.scaled_calibrated_residuals_
+                    )
+                    self.sims_ = np.asarray(
+                        [
+                            pred
+                            + self.calibrated_residuals_scaler_.scale_[0]
+                            * self.kde_.resample(
+                                size=X.shape[0], seed=self.seed + i
+                            ).ravel()
+                            for i in range(self.replications)
+                        ]
+                    ).T
+                self.mean_ = np.mean(self.sims_, axis=1)
+                self.lower_ = np.quantile(
+                    self.sims_, q=self.alpha_ / 200, axis=1
+                )
+                self.upper_ = np.quantile(
+                    self.sims_, q=1 - self.alpha_ / 200, axis=1
+                )
+                DescribeResult = namedtuple(
+                    "DescribeResult", ("mean", "sims", "lower", "upper")
+                )
+                return DescribeResult(
+                    self.mean_, self.sims_, self.lower_, self.upper_
+                )
+        if self.method == "localconformal":
+            if self.replications is None:
+                if return_pi:
+                    predictions_bounds = self.icp_.predict(
+                        X, significance=1 - self.level
+                    )
+                    DescribeResult = namedtuple(
+                        "DescribeResult", ("mean", "lower", "upper")
+                    )
+                    return DescribeResult(
+                        pred, predictions_bounds[:, 0], predictions_bounds[:, 1]
+                    )
+                else:
+                    return pred
+            else:  # if self.replications is not None
+                assert self.type_pi in (
+                    "bootstrap",
+                    "kde",
+                ), "`self.type_pi` must be in ('bootstrap', 'kde')"
+                if self.type_pi == "bootstrap":
+                    np.random.seed(self.seed)
+                    self.residuals_sims_ = np.asarray(
+                        [
+                            np.random.choice(
+                                a=self.scaled_calibrated_residuals_,
+                                size=X.shape[0],
+                            )
+                            for _ in range(self.replications)
+                        ]
+                    ).T
+                    self.sims_ = np.asarray(
+                        [
+                            pred
+                            + self.calibrated_residuals_scaler_.scale_[0]
+                            * self.residuals_sims_[:, i].ravel()
+                            for i in tqdm(range(self.replications))
+                        ]
+                    ).T
+                elif self.type_pi == "kde":
+                    self.kde_ = gaussian_kde(
+                        dataset=self.scaled_calibrated_residuals_
+                    )
+                    self.sims_ = np.asarray(
+                        [
+                            pred
+                            + self.calibrated_residuals_scaler_.scale_[0]
+                            * self.kde_.resample(
+                                size=X.shape[0], seed=self.seed + i
+                            ).ravel()
+                            for i in tqdm(range(self.replications))
+                        ]
+                    ).T
+                self.mean_ = np.mean(self.sims_, axis=1)
+                self.lower_ = np.quantile(
+                    self.sims_, q=self.alpha_ / 200, axis=1
+                )
+                self.upper_ = np.quantile(
+                    self.sims_, q=1 - self.alpha_ / 200, axis=1
+                )
+                DescribeResult = namedtuple(
+                    "DescribeResult", ("mean", "sims", "lower", "upper")
+                )
+                return DescribeResult(
+                    self.mean_, self.sims_, self.lower_, self.upper_
+                )

unifiedbooster/predictionset/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .predictionset import PredictionSet
+__all__ = ["PredictionSet"]

unifiedbooster/predictionset/predictionset.py ADDED Viewed

@@ -0,0 +1,113 @@
+from locale import normalize
+import numpy as np
+import pickle
+from collections import namedtuple
+from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.model_selection import train_test_split
+from sklearn.ensemble import ExtraTreesRegressor
+from sklearn.preprocessing import StandardScaler
+from scipy.stats import gaussian_kde
+from tqdm import tqdm
+from ..nonconformist import (
+    ClassifierAdapter,
+    IcpClassifier,
+    TcpClassifier,
+    ClassifierNc,
+    MarginErrFunc,
+)
+class PredictionSet(BaseEstimator, ClassifierMixin):
+    """Class PredictionSet: Obtain prediction sets.
+    Attributes:
+        obj: an object;
+            fitted object containing methods `fit` and `predict`
+        method: a string;
+            method for constructing the prediction sets.
+            Currently "icp" (default, inductive conformal) and "tcp" (transductive conformal)
+        level: a float;
+            Confidence level for prediction sets. Default is None,
+            95 is equivalent to a miscoverage error of 5 (%)
+        seed: an integer;
+            Reproducibility of fit (there's a random split between fitting and calibration data)
+    """
+    def __init__(
+        self,
+        obj,
+        method="icp",
+        level=None,
+        seed=123,
+    ):
+        self.obj = obj
+        self.method = method
+        self.level = level
+        self.seed = seed
+        if self.level is not None:
+            self.alpha_ = 1 - self.level / 100
+        self.quantile_ = None
+        self.icp_ = None
+        self.tcp_ = None
+        if self.method == "icp":
+            self.icp_ = IcpClassifier(
+                ClassifierNc(ClassifierAdapter(self.obj), MarginErrFunc()),
+            )
+        elif self.method == "tcp":
+            self.tcp_ = TcpClassifier(
+                ClassifierNc(ClassifierAdapter(self.obj), MarginErrFunc()),
+            )
+        else:
+            raise ValueError("`self.method` must be in ('icp', 'tcp')")
+    def fit(self, X, y):
+        """Fit the `method` to training data (X, y).
+        Args:
+            X: array-like, shape = [n_samples, n_features];
+                Training set vectors, where n_samples is the number
+                of samples and n_features is the number of features.
+            y: array-like, shape = [n_samples, ]; Target values.
+        """
+        if self.method == "icp":
+            X_train, X_calibration, y_train, y_calibration = train_test_split(
+                X, y, test_size=0.5, random_state=self.seed
+            )
+            self.icp_.fit(X_train, y_train)
+            self.icp_.calibrate(X_calibration, y_calibration)
+        elif self.method == "tcp":
+            self.tcp_.fit(X, y)
+        return self
+    def predict(self, X):
+        """Obtain predictions and prediction sets
+        Args:
+            X: array-like, shape = [n_samples, n_features];
+                Testing set vectors, where n_samples is the number
+                of samples and n_features is the number of features.
+        """
+        if self.method == "icp":
+            return self.icp_.predict(X, significance=self.alpha_)
+        elif self.method == "tcp":
+            return self.tcp_.predict(X, significance=self.alpha_)
+        else:
+            raise ValueError("`self.method` must be in ('icp', 'tcp')")

unifiedbooster-0.9.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,39 @@
+Metadata-Version: 2.4
+Name: unifiedbooster
+Version: 0.9.0
+Summary: Unified interface for Gradient Boosted Decision Trees
+Home-page: https://github.com/thierrymoudiki/unifiedbooster
+Author: T. Moudiki
+Author-email: thierry.moudiki@gmail.com
+License: BSD license
+Keywords: unifiedbooster
+Classifier: Development Status :: 2 - Pre-Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: BSD License
+Classifier: Natural Language :: English
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Requires-Python: >=3.6
+License-File: LICENSE
+Requires-Dist: Cython
+Requires-Dist: numpy
+Requires-Dist: scikit-learn
+Requires-Dist: xgboost
+Requires-Dist: lightgbm
+Requires-Dist: GPopt
+Requires-Dist: nnetsauce
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: home-page
+Dynamic: keywords
+Dynamic: license
+Dynamic: license-file
+Dynamic: requires-dist
+Dynamic: requires-python
+Dynamic: summary
+Unified interface for Gradient Boosted Decision Trees

unifiedbooster-0.9.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,23 @@
+unifiedbooster/__init__.py,sha256=8FEkWCZ2tT8xcW46Z0X_BS9_r0kQWVAu37IncLq6QWU,301
+unifiedbooster/gbdt.py,sha256=FSaZngKlFR943cq1QclZlsOFjM6tmX446e6GeoGtA6Q,5176
+unifiedbooster/gbdt_classification.py,sha256=NbmngLNGt4X1xuTIFKDVP4bS5ieAU_rNu_axSpbbJq0,5795
+unifiedbooster/gbdt_regression.py,sha256=5YjnOlPJOBcuYJN5lOFWA8VLYhIIVmkmceRjCJ4GdjA,5722
+unifiedbooster/gpoptimization.py,sha256=UoT20E5dfhREiY7Cqo0vCktBzDBRnnG_6Xyg426vdfk,15238
+unifiedbooster/nonconformist/__init__.py,sha256=sHEakjPhqUhmZwawAv34bHcTDmF1uZvqvGLIMjOM0B0,739
+unifiedbooster/nonconformist/acp.py,sha256=SrfBVCWjXvntkBJ7GXTFYE6i6NU3Pv-5ibwhpItDKDw,11553
+unifiedbooster/nonconformist/base.py,sha256=3nvSL_rL1Kxkj-lI5rEuMuK7fZyfrFqKKS1-UMdcLNA,4024
+unifiedbooster/nonconformist/cp.py,sha256=YKiBFKwvaJbWnJcgi-saiVD_2ci-LBDHgytf70jHvFg,6174
+unifiedbooster/nonconformist/evaluation.py,sha256=b24buhhW3v3CKRSi69WKCq9Sb38Unmjr8TAZr66Cdns,15906
+unifiedbooster/nonconformist/icp.py,sha256=wqOaoy22KiF2ebVQOjp8MR-zvEjT0hE0NiMfeNZOQEw,15982
+unifiedbooster/nonconformist/nc.py,sha256=_ED8Yn068Ivio9Xr0SjwKh4Ts5MfUACZFY40ObxPJ60,19644
+unifiedbooster/nonconformist/util.py,sha256=UBKlAEb0mj9MVWBOKCRAq_OQP5Z53FMqWlTyo7RIg5Q,242
+unifiedbooster/predictioninterval/__init__.py,sha256=I1X1omp6Bsuzfm7z8TCSICe2175rHrdoXWEDOicOP8U,85
+unifiedbooster/predictioninterval/predictioninterval.py,sha256=6XQnJQDpsWG-uu5yFxeZQewnrErAjZLzv21YvtarXZQ,11164
+unifiedbooster/predictionset/__init__.py,sha256=IGhWVX8-VeZ15HeLFWu8QeKCz7DIE4TlEXMjTnB3VdE,70
+unifiedbooster/predictionset/predictionset.py,sha256=C38rC1qAhhXa8YUJjVB3yjYjPXToU1HVXBRoBevsRxk,3308
+unifiedbooster-0.9.0.dist-info/licenses/LICENSE,sha256=3rWw63btcdqbC0XMnpzCQhxDP8Vx7yKkKS7EDgJiY_4,1061
+unifiedbooster-0.9.0.dist-info/METADATA,sha256=0Az8dhNCauLHJYfj55TKsIgBUX_RhZ5_VksqFcNVIMs,1151
+unifiedbooster-0.9.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+unifiedbooster-0.9.0.dist-info/entry_points.txt,sha256=OVNTsCzMYnaJ11WIByB7G8Lym_dj-ERKZyQxWFUcW30,59
+unifiedbooster-0.9.0.dist-info/top_level.txt,sha256=gOMxxpRtx8_nJXTWsXJDFkNeCsjSJQPs6aUXKK5_nI4,15
+unifiedbooster-0.9.0.dist-info/RECORD,,

unifiedbooster-0.9.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (80.9.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

unifiedbooster-0.9.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ unifiedbooster = unifiedbooster.cli:main

unifiedbooster-0.9.0.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,7 @@
+Copyright <2024> <T. Moudiki>
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

unifiedbooster-0.9.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ unifiedbooster