PyPI - wavetrainer - Versions diffs - 0.0.40__tar.gz → 0.0.42__tar.gz - Mend

wavetrainer 0.0.40tar.gz → 0.0.42tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

{wavetrainer-0.0.40/wavetrainer.egg-info → wavetrainer-0.0.42}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: wavetrainer
-Version: 0.0.40
+Version: 0.0.42
 Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
 Home-page: https://github.com/8W9aG/wavetrainer
 Author: Will Sackfield

{wavetrainer-0.0.40 → wavetrainer-0.0.42}/setup.py RENAMED Viewed

@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
 setup(
     name='wavetrainer',
-    version='0.0.40',
+    version='0.0.42',
     description='A library for automatically finding the optimal model within feature and hyperparameter space.',
     long_description=long_description,
     long_description_content_type='text/markdown',

{wavetrainer-0.0.40 → wavetrainer-0.0.42}/wavetrainer/__init__.py RENAMED Viewed

@@ -2,5 +2,5 @@
 from .create import create
-__VERSION__ = "0.0.40"
+__VERSION__ = "0.0.42"
 __all__ = ("create",)

{wavetrainer-0.0.40 → wavetrainer-0.0.42}/wavetrainer/calibrator/calibrator.py RENAMED Viewed

@@ -1,5 +1,7 @@
 """The prototype calibrator class."""
+import pandas as pd
 from ..fit import Fit
 from ..model.model import Model
 from ..params import Params
@@ -15,3 +17,7 @@ class Calibrator(Params, Fit):
     def name(cls) -> str:
         """The name of the calibrator."""
         raise NotImplementedError("name not implemented in parent class.")
+    def predictions_as_x(self, y: pd.Series | pd.DataFrame | None = None) -> bool:
+        """Whether the calibrator wants predictions as X rather than features."""
+        raise NotImplementedError("predictions_as_x not implemented in parent class.")

{wavetrainer-0.0.40 → wavetrainer-0.0.42}/wavetrainer/calibrator/calibrator_router.py RENAMED Viewed

@@ -36,6 +36,13 @@ class CalibratorRouter(Calibrator):
     def name(cls) -> str:
         return "router"
+    def predictions_as_x(self, y: pd.Series | pd.DataFrame | None = None) -> bool:
+        if y is None:
+            raise ValueError("y is null")
+        if determine_model_type(y) == ModelType.REGRESSION:
+            return False
+        return True
     def set_options(
         self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
     ) -> None:

{wavetrainer-0.0.40 → wavetrainer-0.0.42}/wavetrainer/calibrator/mapie_calibrator.py RENAMED Viewed

@@ -1,13 +1,11 @@
 """A calibrator that implements MAPIE."""
-import logging
 import os
 from typing import Self
 import joblib  # type: ignore
 import optuna
 import pandas as pd
-import sklearn  # type: ignore
 from mapie.regression import MapieRegressor  # type: ignore
 from ..model.model import PROBABILITY_COLUMN_PREFIX, Model
@@ -23,12 +21,15 @@ class MAPIECalibrator(Calibrator):
     def __init__(self, model: Model):
         super().__init__(model)
-        self._mapie = MapieRegressor(model.estimator, method="plus")
+        self._mapie = MapieRegressor(model.create_estimator(), method="plus")
     @classmethod
     def name(cls) -> str:
         return "mapie"
+    def predictions_as_x(self, y: pd.Series | pd.DataFrame | None = None) -> bool:
+        return False
     def set_options(
         self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
     ) -> None:
@@ -59,20 +60,18 @@ class MAPIECalibrator(Calibrator):
         return self
     def transform(self, df: pd.DataFrame) -> pd.DataFrame:
-        try:
-            alpha = []
-            for potential_alpha in [0.05, 0.32]:
-                if len(df) > int(1.0 / potential_alpha) + 1:
-                    alpha.append(potential_alpha)
-            if alpha:
-                _, y_pis = self._mapie.predict(df, alpha=alpha)
-                for i in range(y_pis.shape[1]):
-                    if i >= len(alpha):
-                        continue
-                    for ii in range(y_pis.shape[2]):
-                        alpha_val = alpha[i]
-                        values = y_pis[:, i, ii].flatten().tolist()
-                        df[f"{PROBABILITY_COLUMN_PREFIX}{alpha_val}_{ii == 1}"] = values
-        except sklearn.exceptions.NotFittedError as exc:  # type: ignore
-            logging.warning(str(exc))
-        return df
+        alpha = []
+        for potential_alpha in [0.05, 0.32]:
+            if len(df) > int(1.0 / potential_alpha) + 1:
+                alpha.append(potential_alpha)
+        ret_df = pd.DataFrame(index=df.index)
+        if alpha:
+            _, y_pis = self._mapie.predict(df, alpha=alpha)
+            for i in range(y_pis.shape[1]):
+                if i >= len(alpha):
+                    continue
+                for ii in range(y_pis.shape[2]):
+                    alpha_val = alpha[i]
+                    values = y_pis[:, i, ii].flatten().tolist()
+                    ret_df[f"{PROBABILITY_COLUMN_PREFIX}{alpha_val}_{ii == 1}"] = values
+        return ret_df

{wavetrainer-0.0.40 → wavetrainer-0.0.42}/wavetrainer/calibrator/vennabers_calibrator.py RENAMED Viewed

@@ -28,6 +28,9 @@ class VennabersCalibrator(Calibrator):
     def name(cls) -> str:
         return "vennabers"
+    def predictions_as_x(self, y: pd.Series | pd.DataFrame | None = None) -> bool:
+        return True
     def set_options(
         self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
     ) -> None:

{wavetrainer-0.0.40 → wavetrainer-0.0.42}/wavetrainer/model/catboost/catboost_kwargs.py RENAMED Viewed

@@ -38,12 +38,13 @@ def handle_fit_kwargs(*args, **kwargs) -> tuple[tuple[Any, ...], dict[str, Any]]
         args_list[0] = df[included_columns]
         args = tuple(args_list)
-        eval_x = eval_x[included_columns]
-        kwargs[EVAL_SET_ARG_KEY] = Pool(
-            eval_x,
-            label=eval_y,
-            cat_features=cat_features,
-        )
+        if eval_x is not None:
+            eval_x = eval_x[included_columns]
+            kwargs[EVAL_SET_ARG_KEY] = Pool(
+                eval_x,
+                label=eval_y,
+                cat_features=cat_features,
+            )
         kwargs[CAT_FEATURES_ARG_KEY] = cat_features
         del kwargs[ORIGINAL_X_ARG_KEY]

{wavetrainer-0.0.40 → wavetrainer-0.0.42}/wavetrainer/model/catboost/catboost_model.py RENAMED Viewed

@@ -1,9 +1,10 @@
 """A model that wraps catboost."""
+# pylint: disable=line-too-long
 import json
 import logging
 import os
-from typing import Any, Self
+from typing import Self
 import optuna
 import pandas as pd
@@ -13,8 +14,6 @@ from catboost import CatBoost, Pool  # type: ignore
 from ...model_type import ModelType, determine_model_type
 from ..model import PREDICTION_COLUMN, PROBABILITY_COLUMN_PREFIX, Model
 from .catboost_classifier_wrap import CatBoostClassifierWrapper
-from .catboost_kwargs import (CAT_FEATURES_ARG_KEY, EVAL_SET_ARG_KEY,
-                              ORIGINAL_X_ARG_KEY)
 from .catboost_regressor_wrap import CatBoostRegressorWrapper
 _MODEL_FILENAME = "model.cbm"
@@ -64,10 +63,6 @@ class CatboostModel(Model):
         self._early_stopping_rounds = None
         self._best_iteration = None
-    @property
-    def estimator(self) -> Any:
-        return self._provide_catboost()
     @property
     def supports_importances(self) -> bool:
         return True
@@ -82,23 +77,11 @@ class CatboostModel(Model):
         importances = importances["Importances"].to_list()  # type: ignore
         return {feature_ids[x]: importances[x] for x in range(len(feature_ids))}
-    def pre_fit(
-        self,
-        df: pd.DataFrame,
-        y: pd.Series | pd.DataFrame | None,
-        eval_x: pd.DataFrame | None = None,
-        eval_y: pd.Series | pd.DataFrame | None = None,
-        w: pd.Series | None = None,
-    ):
-        if y is None:
-            raise ValueError("y is null.")
-        self._model_type = determine_model_type(y)
-        return {
-            EVAL_SET_ARG_KEY: (eval_x, eval_y),
-            CAT_FEATURES_ARG_KEY: df.select_dtypes(include="category").columns.tolist(),
-            ORIGINAL_X_ARG_KEY: df,
-            "sample_weight": w,
-        }
+    def provide_estimator(self):
+        return self._provide_catboost()
+    def create_estimator(self):
+        return self._create_catboost()
     def set_options(
         self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
@@ -214,66 +197,66 @@ class CatboostModel(Model):
     def _provide_catboost(self) -> CatBoost:
         catboost = self._catboost
         if catboost is None:
-            best_iteration = self._best_iteration
-            iterations = (
-                best_iteration if best_iteration is not None else self._iterations
-            )
-            logging.info(
-                "Creating catboost model with depth %d, boosting type %s, best iteration %d",
-                self._depth,
-                self._boosting_type,
-                -1 if best_iteration is None else best_iteration,
-            )
-            match self._model_type:
-                case ModelType.BINARY:
-                    catboost = CatBoostClassifierWrapper(
-                        iterations=iterations,
-                        learning_rate=self._learning_rate,
-                        depth=self._depth,
-                        l2_leaf_reg=self._l2_leaf_reg,
-                        boosting_type=self._boosting_type,
-                        early_stopping_rounds=self._early_stopping_rounds,
-                        metric_period=100,
-                        task_type="GPU" if torch.cuda.is_available() else "CPU",
-                        devices="0" if torch.cuda.is_available() else None,
-                    )
-                case ModelType.REGRESSION:
-                    catboost = CatBoostRegressorWrapper(
-                        iterations=iterations,
-                        learning_rate=self._learning_rate,
-                        depth=self._depth,
-                        l2_leaf_reg=self._l2_leaf_reg,
-                        boosting_type=self._boosting_type,
-                        early_stopping_rounds=self._early_stopping_rounds,
-                        metric_period=100,
-                        task_type="GPU" if torch.cuda.is_available() else "CPU",
-                        devices="0" if torch.cuda.is_available() else None,
-                    )
-                case ModelType.BINNED_BINARY:
-                    catboost = CatBoostClassifierWrapper(
-                        iterations=iterations,
-                        learning_rate=self._learning_rate,
-                        depth=self._depth,
-                        l2_leaf_reg=self._l2_leaf_reg,
-                        boosting_type=self._boosting_type,
-                        early_stopping_rounds=self._early_stopping_rounds,
-                        metric_period=100,
-                        task_type="GPU" if torch.cuda.is_available() else "CPU",
-                        devices="0" if torch.cuda.is_available() else None,
-                    )
-                case ModelType.MULTI_CLASSIFICATION:
-                    catboost = CatBoostClassifierWrapper(
-                        iterations=iterations,
-                        learning_rate=self._learning_rate,
-                        depth=self._depth,
-                        l2_leaf_reg=self._l2_leaf_reg,
-                        boosting_type=self._boosting_type,
-                        early_stopping_rounds=self._early_stopping_rounds,
-                        metric_period=100,
-                        task_type="GPU" if torch.cuda.is_available() else "CPU",
-                        devices="0" if torch.cuda.is_available() else None,
-                    )
+            catboost = self._create_catboost()
             self._catboost = catboost
         if catboost is None:
             raise ValueError("catboost is null")
         return catboost
+    def _create_catboost(self) -> CatBoost:
+        best_iteration = self._best_iteration
+        iterations = best_iteration if best_iteration is not None else self._iterations
+        print(
+            f"Creating catboost model with depth {self._depth}, boosting type {self._boosting_type}, best iteration {best_iteration}",
+        )
+        match self._model_type:
+            case ModelType.BINARY:
+                return CatBoostClassifierWrapper(
+                    iterations=iterations,
+                    learning_rate=self._learning_rate,
+                    depth=self._depth,
+                    l2_leaf_reg=self._l2_leaf_reg,
+                    boosting_type=self._boosting_type,
+                    early_stopping_rounds=self._early_stopping_rounds,
+                    metric_period=100,
+                    task_type="GPU" if torch.cuda.is_available() else "CPU",
+                    devices="0" if torch.cuda.is_available() else None,
+                )
+            case ModelType.REGRESSION:
+                return CatBoostRegressorWrapper(
+                    iterations=iterations,
+                    learning_rate=self._learning_rate,
+                    depth=self._depth,
+                    l2_leaf_reg=self._l2_leaf_reg,
+                    boosting_type=self._boosting_type,
+                    early_stopping_rounds=self._early_stopping_rounds,
+                    metric_period=100,
+                    task_type="GPU" if torch.cuda.is_available() else "CPU",
+                    devices="0" if torch.cuda.is_available() else None,
+                )
+            case ModelType.BINNED_BINARY:
+                return CatBoostClassifierWrapper(
+                    iterations=iterations,
+                    learning_rate=self._learning_rate,
+                    depth=self._depth,
+                    l2_leaf_reg=self._l2_leaf_reg,
+                    boosting_type=self._boosting_type,
+                    early_stopping_rounds=self._early_stopping_rounds,
+                    metric_period=100,
+                    task_type="GPU" if torch.cuda.is_available() else "CPU",
+                    devices="0" if torch.cuda.is_available() else None,
+                )
+            case ModelType.MULTI_CLASSIFICATION:
+                return CatBoostClassifierWrapper(
+                    iterations=iterations,
+                    learning_rate=self._learning_rate,
+                    depth=self._depth,
+                    l2_leaf_reg=self._l2_leaf_reg,
+                    boosting_type=self._boosting_type,
+                    early_stopping_rounds=self._early_stopping_rounds,
+                    metric_period=100,
+                    task_type="GPU" if torch.cuda.is_available() else "CPU",
+                    devices="0" if torch.cuda.is_available() else None,
+                )
+            case _:
+                raise ValueError(f"Unrecognised model type: {self._model_type}")

{wavetrainer-0.0.40 → wavetrainer-0.0.42}/wavetrainer/model/model.py RENAMED Viewed

@@ -25,11 +25,6 @@ class Model(Params, Fit):
         """Whether the model supports the X values."""
         raise NotImplementedError("supports_x not implemented in parent class.")
-    @property
-    def estimator(self) -> Any:
-        """The estimator backing the model."""
-        raise NotImplementedError("estimator not implemented in parent class.")
     @property
     def supports_importances(self) -> bool:
         """Whether this model supports feature importances."""
@@ -44,13 +39,10 @@ class Model(Params, Fit):
             "feature_importances not implemented in parent class."
         )
-    def pre_fit(
-        self,
-        df: pd.DataFrame,
-        y: pd.Series | pd.DataFrame | None,
-        eval_x: pd.DataFrame | None = None,
-        eval_y: pd.Series | pd.DataFrame | None = None,
-        w: pd.Series | None = None,
-    ) -> dict[str, Any]:
-        """A call to make sure the model is prepared for the target type."""
-        raise NotImplementedError("pre_fit not implemented in parent class.")
+    def provide_estimator(self) -> Any:
+        """Provides the current estimator."""
+        raise NotImplementedError("provides_estimator not implemented in parent class.")
+    def create_estimator(self) -> Any:
+        """Creates a new estimator."""
+        raise NotImplementedError("creates_estimator not implemented in parent class.")

{wavetrainer-0.0.40 → wavetrainer-0.0.42}/wavetrainer/model/model_router.py RENAMED Viewed

@@ -2,7 +2,7 @@
 import json
 import os
-from typing import Any, Self
+from typing import Self
 import optuna
 import pandas as pd
@@ -40,13 +40,6 @@ class ModelRouter(Model):
     def supports_x(cls, df: pd.DataFrame) -> bool:
         return True
-    @property
-    def estimator(self) -> Any:
-        model = self._model
-        if model is None:
-            raise ValueError("model is null")
-        return model.estimator
     @property
     def supports_importances(self) -> bool:
         model = self._model
@@ -61,18 +54,17 @@ class ModelRouter(Model):
             raise ValueError("model is null")
         return model.feature_importances
-    def pre_fit(
-        self,
-        df: pd.DataFrame,
-        y: pd.Series | pd.DataFrame | None,
-        eval_x: pd.DataFrame | None = None,
-        eval_y: pd.Series | pd.DataFrame | None = None,
-        w: pd.Series | None = None,
-    ) -> dict[str, Any]:
+    def provide_estimator(self):
+        model = self._model
+        if model is None:
+            raise ValueError("model is null")
+        return model.provide_estimator()
+    def create_estimator(self):
         model = self._model
         if model is None:
             raise ValueError("model is null")
-        return model.pre_fit(df, y=y, eval_x=eval_x, eval_y=eval_y, w=w)
+        return model.create_estimator()
     def set_options(
         self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame

{wavetrainer-0.0.40 → wavetrainer-0.0.42}/wavetrainer/model/tabpfn/tabpfn_model.py RENAMED Viewed

@@ -5,7 +5,7 @@ import json
 import logging
 import os
 import pickle
-from typing import Any, Self
+from typing import Self
 import optuna
 import pandas as pd
@@ -42,10 +42,6 @@ class TabPFNModel(Model):
         self._tabpfn = None
         self._model_type = None
-    @property
-    def estimator(self) -> Any:
-        return self._provide_tabpfn()
     @property
     def supports_importances(self) -> bool:
         return False
@@ -54,18 +50,11 @@ class TabPFNModel(Model):
     def feature_importances(self) -> dict[str, float]:
         return {}
-    def pre_fit(
-        self,
-        df: pd.DataFrame,
-        y: pd.Series | pd.DataFrame | None,
-        eval_x: pd.DataFrame | None = None,
-        eval_y: pd.Series | pd.DataFrame | None = None,
-        w: pd.Series | None = None,
-    ):
-        if y is None:
-            raise ValueError("y is null.")
-        self._model_type = determine_model_type(y)
-        return {}
+    def provide_estimator(self):
+        return self._provide_tabpfn()
+    def create_estimator(self):
+        return self._create_tabpfn()
     def set_options(
         self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
@@ -133,29 +122,34 @@ class TabPFNModel(Model):
     def _provide_tabpfn(self) -> AutoTabPFNClassifier | AutoTabPFNRegressor:
         tabpfn = self._tabpfn
         if tabpfn is None:
-            max_time = 1 if pytest_is_running.is_running() else 120
-            match self._model_type:
-                case ModelType.BINARY:
-                    tabpfn = AutoTabPFNClassifier(
-                        max_time=max_time,
-                        device="cuda" if torch.cuda.is_available() else "cpu",
-                    )
-                case ModelType.REGRESSION:
-                    tabpfn = AutoTabPFNRegressor(
-                        max_time=max_time,
-                        device="cuda" if torch.cuda.is_available() else "cpu",
-                    )
-                case ModelType.BINNED_BINARY:
-                    tabpfn = AutoTabPFNClassifier(
-                        max_time=max_time,
-                        device="cuda" if torch.cuda.is_available() else "cpu",
-                    )
-                case ModelType.MULTI_CLASSIFICATION:
-                    tabpfn = AutoTabPFNClassifier(
-                        max_time=max_time,
-                        device="cuda" if torch.cuda.is_available() else "cpu",
-                    )
+            tabpfn = self._create_tabpfn()
             self._tabpfn = tabpfn
         if tabpfn is None:
             raise ValueError("tabpfn is null")
         return tabpfn
+    def _create_tabpfn(self) -> AutoTabPFNClassifier | AutoTabPFNRegressor:
+        max_time = 1 if pytest_is_running.is_running() else 120
+        match self._model_type:
+            case ModelType.BINARY:
+                return AutoTabPFNClassifier(
+                    max_time=max_time,
+                    device="cuda" if torch.cuda.is_available() else "cpu",
+                )
+            case ModelType.REGRESSION:
+                return AutoTabPFNRegressor(
+                    max_time=max_time,
+                    device="cuda" if torch.cuda.is_available() else "cpu",
+                )
+            case ModelType.BINNED_BINARY:
+                return AutoTabPFNClassifier(
+                    max_time=max_time,
+                    device="cuda" if torch.cuda.is_available() else "cpu",
+                )
+            case ModelType.MULTI_CLASSIFICATION:
+                return AutoTabPFNClassifier(
+                    max_time=max_time,
+                    device="cuda" if torch.cuda.is_available() else "cpu",
+                )
+            case _:
+                raise ValueError(f"Unrecognised model type: {self._model_type}")

{wavetrainer-0.0.40 → wavetrainer-0.0.42}/wavetrainer/model/xgboost/xgboost_logger.py RENAMED Viewed

@@ -15,6 +15,10 @@ class XGBoostEpochsLogger(TrainingCallback):
             return False
         log_items = []
         for dataset, metrics in evals_log.items():
+            if dataset == "validation_0":
+                dataset = "validation"
+            elif dataset == "validation_1":
+                dataset = "train"
             for metric_name, values in metrics.items():
                 current_val = values[-1]
                 log_items.append(f"{dataset}-{metric_name}: {current_val:.5f}")

wavetrainer 0.0.40__tar.gz → 0.0.42__tar.gz

wavetrainer 0.0.40tar.gz → 0.0.42tar.gz