PyPI - unifiedbooster - Versions diffs - 0.4.2__py3-none-any.whl → 0.6.0__py3-none-any.whl - Mend

unifiedbooster 0.4.2py3-none-any.whl → 0.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

unifiedbooster/gbdt.py +3 -1
unifiedbooster/gbdt_classification.py +36 -36
unifiedbooster/gbdt_regression.py +36 -36
unifiedbooster/gpoptimization.py +127 -63
unifiedbooster/nonconformist/__init__.py +30 -0
unifiedbooster/nonconformist/acp.py +381 -0
unifiedbooster/nonconformist/base.py +156 -0
unifiedbooster/nonconformist/cp.py +172 -0
unifiedbooster/nonconformist/evaluation.py +486 -0
unifiedbooster/nonconformist/icp.py +442 -0
unifiedbooster/nonconformist/nc.py +610 -0
unifiedbooster/nonconformist/util.py +9 -0
{unifiedbooster-0.4.2.dist-info → unifiedbooster-0.6.0.dist-info}/METADATA +1 -1
unifiedbooster-0.6.0.dist-info/RECORD +19 -0
unifiedbooster-0.4.2.dist-info/RECORD +0 -11
{unifiedbooster-0.4.2.dist-info → unifiedbooster-0.6.0.dist-info}/LICENSE +0 -0
{unifiedbooster-0.4.2.dist-info → unifiedbooster-0.6.0.dist-info}/WHEEL +0 -0
{unifiedbooster-0.4.2.dist-info → unifiedbooster-0.6.0.dist-info}/entry_points.txt +0 -0
{unifiedbooster-0.4.2.dist-info → unifiedbooster-0.6.0.dist-info}/top_level.txt +0 -0

unifiedbooster/gbdt.py CHANGED Viewed

@@ -90,7 +90,9 @@ class GBDT(BaseEstimator):
                 "depth": self.max_depth,
                 "verbose": self.verbose,
                 "random_seed": self.seed,
-                "bootstrap_type": "MVS",
+                "boosting_type": "Plain",
+                "leaf_estimation_iterations": 1,
+                "bootstrap_type": "Bernoulli",
                 **kwargs,
             }
         elif self.model_type == "gradientboosting":

unifiedbooster/gbdt_classification.py CHANGED Viewed

@@ -51,42 +51,42 @@ class GBDTClassifier(GBDT, ClassifierMixin):
     Examples:
-    ```python
-    import unifiedbooster as ub
-    from sklearn.datasets import load_iris
-    from sklearn.model_selection import train_test_split
-    from sklearn.metrics import accuracy_score
-    # Load dataset
-    iris = load_iris()
-    X, y = iris.data, iris.target
-    # Split dataset into training and testing sets
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-    # Initialize the unified regressor (example with XGBoost)
-    regressor1 = ub.GBDTClassifier(model_type='xgboost')
-    #regressor2 = ub.GBDTClassifier(model_type='catboost')
-    regressor3 = ub.GBDTClassifier(model_type='lightgbm')
-    # Fit the model
-    regressor1.fit(X_train, y_train)
-    #regressor2.fit(X_train, y_train)
-    regressor3.fit(X_train, y_train)
-    # Predict on the test set
-    y_pred1 = regressor1.predict(X_test)
-    #y_pred2 = regressor2.predict(X_test)
-    y_pred3 = regressor3.predict(X_test)
-    # Evaluate the model
-    accuracy1 = accuracy_score(y_test, y_pred1)
-    #accuracy2 = accuracy_score(y_test, y_pred2)
-    accuracy3 = accuracy_score(y_test, y_pred3)
-    print(f"Classification Accuracy xgboost: {accuracy1:.2f}")
-    #print(f"Classification Accuracy catboost: {accuracy2:.2f}")
-    print(f"Classification Accuracy lightgbm: {accuracy3:.2f}")
-    ```
+        ```python
+        import unifiedbooster as ub
+        from sklearn.datasets import load_iris
+        from sklearn.model_selection import train_test_split
+        from sklearn.metrics import accuracy_score
+        # Load dataset
+        iris = load_iris()
+        X, y = iris.data, iris.target
+        # Split dataset into training and testing sets
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        # Initialize the unified regressor (example with XGBoost)
+        regressor1 = ub.GBDTClassifier(model_type='xgboost')
+        #regressor2 = ub.GBDTClassifier(model_type='catboost')
+        regressor3 = ub.GBDTClassifier(model_type='lightgbm')
+        # Fit the model
+        regressor1.fit(X_train, y_train)
+        #regressor2.fit(X_train, y_train)
+        regressor3.fit(X_train, y_train)
+        # Predict on the test set
+        y_pred1 = regressor1.predict(X_test)
+        #y_pred2 = regressor2.predict(X_test)
+        y_pred3 = regressor3.predict(X_test)
+        # Evaluate the model
+        accuracy1 = accuracy_score(y_test, y_pred1)
+        #accuracy2 = accuracy_score(y_test, y_pred2)
+        accuracy3 = accuracy_score(y_test, y_pred3)
+        print(f"Classification Accuracy xgboost: {accuracy1:.2f}")
+        #print(f"Classification Accuracy catboost: {accuracy2:.2f}")
+        print(f"Classification Accuracy lightgbm: {accuracy3:.2f}")
+        ```
     """
     def __init__(

unifiedbooster/gbdt_regression.py CHANGED Viewed

@@ -51,42 +51,42 @@ class GBDTRegressor(GBDT, RegressorMixin):
     Examples:
-    ```python
-    import unifiedbooster as ub
-    from sklearn.datasets import fetch_california_housing
-    from sklearn.model_selection import train_test_split
-    from sklearn.metrics import mean_squared_error
-    # Load dataset
-    housing = fetch_california_housing()
-    X, y = housing.data, housing.target
-    # Split dataset into training and testing sets
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-    # Initialize the unified regressor (example with XGBoost)
-    regressor1 = ub.GBDTRegressor(model_type='xgboost')
-    #regressor2 = ub.GBDTRegressor(model_type='catboost')
-    regressor3 = ub.GBDTRegressor(model_type='lightgbm')
-    # Fit the model
-    regressor1.fit(X_train, y_train)
-    #regressor2.fit(X_train, y_train)
-    regressor3.fit(X_train, y_train)
-    # Predict on the test set
-    y_pred1 = regressor1.predict(X_test)
-    #y_pred2 = regressor2.predict(X_test)
-    y_pred3 = regressor3.predict(X_test)
-    # Evaluate the model
-    mse1 = mean_squared_error(y_test, y_pred1)
-    #mse2 = mean_squared_error(y_test, y_pred2)
-    mse3 = mean_squared_error(y_test, y_pred3)
-    print(f"Regression Mean Squared Error xgboost: {mse1:.2f}")
-    #print(f"Regression Mean Squared Error catboost: {mse2:.2f}")
-    print(f"Regression Mean Squared Error lightgbm: {mse3:.2f}")
-    ```
+        ```python
+        import unifiedbooster as ub
+        from sklearn.datasets import fetch_california_housing
+        from sklearn.model_selection import train_test_split
+        from sklearn.metrics import mean_squared_error
+        # Load dataset
+        housing = fetch_california_housing()
+        X, y = housing.data, housing.target
+        # Split dataset into training and testing sets
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        # Initialize the unified regressor (example with XGBoost)
+        regressor1 = ub.GBDTRegressor(model_type='xgboost')
+        #regressor2 = ub.GBDTRegressor(model_type='catboost')
+        regressor3 = ub.GBDTRegressor(model_type='lightgbm')
+        # Fit the model
+        regressor1.fit(X_train, y_train)
+        #regressor2.fit(X_train, y_train)
+        regressor3.fit(X_train, y_train)
+        # Predict on the test set
+        y_pred1 = regressor1.predict(X_test)
+        #y_pred2 = regressor2.predict(X_test)
+        y_pred3 = regressor3.predict(X_test)
+        # Evaluate the model
+        mse1 = mean_squared_error(y_test, y_pred1)
+        #mse2 = mean_squared_error(y_test, y_pred2)
+        mse3 = mean_squared_error(y_test, y_pred3)
+        print(f"Regression Mean Squared Error xgboost: {mse1:.2f}")
+        #print(f"Regression Mean Squared Error catboost: {mse2:.2f}")
+        print(f"Regression Mean Squared Error lightgbm: {mse3:.2f}")
+        ```
     """
     def __init__(

unifiedbooster/gpoptimization.py CHANGED Viewed

@@ -18,7 +18,7 @@ def cross_val_optim(
     model_type="xgboost",
     type_fit="classification",
     scoring="accuracy",
-    n_estimators=100,
+    n_estimators=None,
     surrogate_obj=None,
     cv=5,
     n_jobs=None,
@@ -59,7 +59,7 @@ def cross_val_optim(
             scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules
         n_estimators: int
-            maximum number of trees that can be built
+            maximum number of trees that can be built (default is None, and if None, then the parameter is tuned)
         surrogate_obj: an object;
             An ML model for estimating the uncertainty around the objective function
@@ -168,63 +168,121 @@ def cross_val_optim(
         ).mean()
     # objective function for hyperparams tuning
-    def crossval_objective(xx):
-        return gbdt_cv(
-            X_train=X_train,
-            y_train=y_train,
-            model_type=model_type,
-            n_estimators=n_estimators,
-            learning_rate=10 ** xx[0],
-            max_depth=int(xx[1]),
-            rowsample=xx[2],
-            colsample=xx[3],
-            cv=cv,
-            n_jobs=n_jobs,
-            type_fit=type_fit,
-            scoring=scoring,
-            seed=seed,
-        )
+    if n_estimators is not None:
+        def crossval_objective(xx):
+            return gbdt_cv(
+                X_train=X_train,
+                y_train=y_train,
+                model_type=model_type,
+                n_estimators=n_estimators,
+                learning_rate=10 ** xx[0],
+                max_depth=int(xx[1]),
+                rowsample=xx[2],
+                colsample=xx[3],
+                cv=cv,
+                n_jobs=n_jobs,
+                type_fit=type_fit,
+                scoring=scoring,
+                seed=seed,
+            )
+    else: # n_estimators is None
+        def crossval_objective(xx):
+            return gbdt_cv(
+                    X_train=X_train,
+                    y_train=y_train,
+                    model_type=model_type,
+                    n_estimators=int(10 ** xx[4]),
+                    learning_rate=10 ** xx[0],
+                    max_depth=int(xx[1]),
+                    rowsample=xx[2],
+                    colsample=xx[3],
+                    cv=cv,
+                    n_jobs=n_jobs,
+                    type_fit=type_fit,
+                    scoring=scoring,
+                    seed=seed,
+                )
-    if surrogate_obj is None:
-        gp_opt = gp.GPOpt(
-            objective_func=crossval_objective,
-            lower_bound=np.array([-6, 1, 0.5, 0.5]),
-            upper_bound=np.array([0, 16, 1.0, 1.0]),
-            params_names=[
-                "learning_rate",
-                "max_depth",
-                "rowsample",
-                "colsample",
-            ],
-            method="bayesian",
-            n_init=n_init,
-            n_iter=n_iter,
-            seed=seed,
-        )
-    else:
-        gp_opt = gp.GPOpt(
-            objective_func=crossval_objective,
-            lower_bound=np.array([-6, 1, 0.5, 0.5]),
-            upper_bound=np.array([0, 16, 1.0, 1.0]),
-            params_names=[
-                "learning_rate",
-                "max_depth",
-                "rowsample",
-                "colsample",
-            ],
-            acquisition="ucb",
-            method="splitconformal",
-            surrogate_obj=ns.PredictionInterval(
-                obj=surrogate_obj, method="splitconformal"
-            ),
-            n_init=n_init,
-            n_iter=n_iter,
-            seed=seed,
-        )
+    if n_estimators is not None:
+        if surrogate_obj is None:
+            gp_opt = gp.GPOpt(
+                objective_func=crossval_objective,
+                lower_bound=np.array([-6, 1, 0.5, 0.5]),
+                upper_bound=np.array([0, 16, 1.0, 1.0]),
+                params_names=[
+                    "learning_rate",
+                    "max_depth",
+                    "rowsample",
+                    "colsample",
+                ],
+                method="bayesian",
+                n_init=n_init,
+                n_iter=n_iter,
+                seed=seed,
+            )
+        else:
+            gp_opt = gp.GPOpt(
+                objective_func=crossval_objective,
+                lower_bound=np.array([-6, 1, 0.5, 0.5]),
+                upper_bound=np.array([0, 16, 1.0, 1.0]),
+                params_names=[
+                    "learning_rate",
+                    "max_depth",
+                    "rowsample",
+                    "colsample",
+                ],
+                acquisition="ucb",
+                method="splitconformal",
+                surrogate_obj=ns.PredictionInterval(
+                    obj=surrogate_obj, method="splitconformal"
+                ),
+                n_init=n_init,
+                n_iter=n_iter,
+                seed=seed,
+            )
+    else: # n_estimators is None
+        if surrogate_obj is None:
+            gp_opt = gp.GPOpt(
+                objective_func=crossval_objective,
+                lower_bound=np.array([-6, 1, 0.5, 0.5, 2]),
+                upper_bound=np.array([0, 16, 1.0, 1.0, 3]),
+                params_names=[
+                    "learning_rate",
+                    "max_depth",
+                    "rowsample",
+                    "colsample",
+                    "n_estimators"
+                ],
+                method="bayesian",
+                n_init=n_init,
+                n_iter=n_iter,
+                seed=seed,
+            )
+        else:
+            gp_opt = gp.GPOpt(
+                objective_func=crossval_objective,
+                lower_bound=np.array([-6, 1, 0.5, 0.5, 2]),
+                upper_bound=np.array([0, 16, 1.0, 1.0, 3]),
+                params_names=[
+                    "learning_rate",
+                    "max_depth",
+                    "rowsample",
+                    "colsample",
+                    "n_estimators"
+                ],
+                acquisition="ucb",
+                method="splitconformal",
+                surrogate_obj=ns.PredictionInterval(
+                    obj=surrogate_obj, method="splitconformal"
+                ),
+                n_init=n_init,
+                n_iter=n_iter,
+                seed=seed,
+            )
     res = gp_opt.optimize(verbose=verbose, abs_tol=abs_tol)
     res.best_params["model_type"] = model_type
-    res.best_params["n_estimators"] = int(n_estimators)
+    res.best_params["n_estimators"] = int(n_estimators) if n_estimators is not None else int(10 ** res.best_params["n_estimators"])
     res.best_params["learning_rate"] = 10 ** res.best_params["learning_rate"]
     res.best_params["max_depth"] = int(res.best_params["max_depth"])
     res.best_params["rowsample"] = res.best_params["rowsample"]
@@ -256,7 +314,7 @@ def lazy_cross_val_optim(
     type_fit="classification",
     scoring="accuracy",
     customize=False,
-    n_estimators=100,
+    n_estimators=None,
     cv=5,
     n_jobs=None,
     n_init=10,
@@ -299,7 +357,7 @@ def lazy_cross_val_optim(
             if True, the surrogate is transformed into a quasi-randomized network (default is False)
         n_estimators: int
-            maximum number of trees that can be built
+            maximum number of trees that can be built (default is None, if None, the  parameters is tuned)
         cv: int;
             number of cross-validation folds
@@ -325,9 +383,14 @@ def lazy_cross_val_optim(
     Examples:
         ```python
+        import os
         import unifiedbooster as ub
         from sklearn.datasets import load_breast_cancer
         from sklearn.model_selection import train_test_split
+        from sklearn.metrics import accuracy_score
+        from time import time
+        print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")
         dataset = load_breast_cancer()
         X, y = dataset.data, dataset.target
@@ -335,25 +398,26 @@ def lazy_cross_val_optim(
             X, y, test_size=0.2, random_state=42
         )
-        res1 = ub.cross_val_optim(
+        start = time()
+        res4 = ub.lazy_cross_val_optim(
             X_train,
             y_train,
-            X_test=None,
-            y_test=None,
+            X_test=X_test,
+            y_test=y_test,
             model_type="lightgbm",
             type_fit="classification",
             scoring="accuracy",
             n_estimators=100,
-            surrogate_obj=None,
             cv=5,
             n_jobs=None,
             n_init=10,
             n_iter=190,
             abs_tol=1e-3,
-            verbose=2,
             seed=123,
+            customize=False
         )
-        print(res1)
+        print(f"Elapsed: {time()-start}")
+        print(res4)
         ```
     """

unifiedbooster/nonconformist/__init__.py ADDED Viewed

@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+"""
+docstring
+"""
+# Authors: Henrik Linusson
+# Yaniv Romano modified np.py file to include CQR
+# T. Moudiki modified __init__.py to import classes
+# __version__ = '2.1.0'
+from .nc import (
+    AbsErrorErrFunc,
+    QuantileRegErrFunc,
+    RegressorNc,
+    RegressorNormalizer,
+)
+from .cp import IcpRegressor, TcpClassifier
+from .icp import IcpClassifier
+from .base import RegressorAdapter
+__all__ = [
+    "AbsErrorErrFunc",
+    "QuantileRegErrFunc",
+    "RegressorAdapter",
+    "RegressorNc",
+    "RegressorNormalizer",
+    "IcpRegressor",
+]

unifiedbooster 0.4.2__py3-none-any.whl → 0.6.0__py3-none-any.whl

unifiedbooster 0.4.2py3-none-any.whl → 0.6.0py3-none-any.whl