PyPI - wavetrainer - Versions diffs - 0.0.4__tar.gz → 0.0.6__tar.gz - Mend

wavetrainer 0.0.4tar.gz → 0.0.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

{wavetrainer-0.0.4/wavetrainer.egg-info → wavetrainer-0.0.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: wavetrainer
-Version: 0.0.4
+Version: 0.0.6
 Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
 Home-page: https://github.com/8W9aG/wavetrainer
 Author: Will Sackfield
@@ -21,7 +21,6 @@ Requires-Dist: scipy>=1.15.2
 Requires-Dist: catboost>=1.2.7
 Requires-Dist: venn-abers>=1.4.6
 Requires-Dist: mapie>=0.9.2
-Requires-Dist: shapiq>=1.2.2
 # wavetrainer
@@ -49,7 +48,6 @@ Python 3.11.6:
 - [catboost](https://catboost.ai/)
 - [venn-abers](https://github.com/ip200/venn-abers)
 - [mapie](https://mapie.readthedocs.io/en/stable/)
-- [shapiq](https://github.com/mmschlk/shapiq)
 ## Raison D'être :thought_balloon:

{wavetrainer-0.0.4 → wavetrainer-0.0.6}/README.md RENAMED Viewed

@@ -24,7 +24,6 @@ Python 3.11.6:
 - [catboost](https://catboost.ai/)
 - [venn-abers](https://github.com/ip200/venn-abers)
 - [mapie](https://mapie.readthedocs.io/en/stable/)
-- [shapiq](https://github.com/mmschlk/shapiq)
 ## Raison D'être :thought_balloon:

wavetrainer-0.0.4/wavetrainer.egg-info/requires.txt → wavetrainer-0.0.6/requirements.txt RENAMED Viewed

@@ -7,5 +7,4 @@ numpy>=1.26.4
 scipy>=1.15.2
 catboost>=1.2.7
 venn-abers>=1.4.6
-mapie>=0.9.2
-shapiq>=1.2.2
+mapie>=0.9.2

{wavetrainer-0.0.4 → wavetrainer-0.0.6}/setup.py RENAMED Viewed

@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
 setup(
     name='wavetrainer',
-    version='0.0.4',
+    version='0.0.6',
     description='A library for automatically finding the optimal model within feature and hyperparameter space.',
     long_description=long_description,
     long_description_content_type='text/markdown',

{wavetrainer-0.0.4 → wavetrainer-0.0.6}/tests/trainer_test.py RENAMED Viewed

@@ -13,7 +13,7 @@ class TestTrainer(unittest.TestCase):
     def test_trainer(self):
         with tempfile.TemporaryDirectory() as tmpdir:
-            trainer = Trainer(tmpdir, walkforward_timedelta=datetime.timedelta(days=1), trials=10)
+            trainer = Trainer(tmpdir, walkforward_timedelta=datetime.timedelta(days=7), trials=1)
             x_data = [i for i in range(100)]
             x_index = [datetime.datetime(2022, 1, 1) + datetime.timedelta(days=i) for i in range(len(x_data))]
             df = pd.DataFrame(

wavetrainer-0.0.6/wavetrainer/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""The wavetrain main module."""
+from .create import create
+__VERSION__ = "0.0.6"
+__all__ = ("create",)

{wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/calibrator/calibrator_router.py RENAMED Viewed

@@ -24,6 +24,8 @@ _CALIBRATORS = {
 class CalibratorRouter(Calibrator):
     """A router that routes to a different calibrator class."""
+    # pylint: disable=too-many-positional-arguments,too-many-arguments
     _calibrator: Calibrator | None
     def __init__(self, model: Model):
@@ -66,7 +68,10 @@ class CalibratorRouter(Calibrator):
         df: pd.DataFrame,
         y: pd.Series | pd.DataFrame | None = None,
         w: pd.Series | None = None,
+        eval_x: pd.DataFrame | None = None,
+        eval_y: pd.Series | pd.DataFrame | None = None,
     ) -> Self:
+        # pylint: disable=no-else-return
         calibrator: Calibrator | None = None
         if determine_model_type(df) == ModelType.REGRESSION:
             calibrator = MAPIECalibrator(self._model)

{wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/calibrator/mapie_calibrator.py RENAMED Viewed

@@ -1,11 +1,13 @@
 """A calibrator that implements MAPIE."""
+import logging
 import os
 from typing import Self
 import joblib  # type: ignore
 import optuna
 import pandas as pd
+import sklearn  # type: ignore
 from mapie.regression import MapieRegressor  # type: ignore
 from ..model.model import PROBABILITY_COLUMN_PREFIX, Model
@@ -17,6 +19,8 @@ _CALIBRATOR_FILENAME = "mapie.joblib"
 class MAPIECalibrator(Calibrator):
     """A class that uses MAPIE as a calibrator."""
+    # pylint: disable=too-many-positional-arguments,too-many-arguments
     def __init__(self, model: Model):
         super().__init__(model)
         self._mapie = MapieRegressor(model.estimator, method="plus")
@@ -39,22 +43,34 @@ class MAPIECalibrator(Calibrator):
         df: pd.DataFrame,
         y: pd.Series | pd.DataFrame | None = None,
         w: pd.Series | None = None,
+        eval_x: pd.DataFrame | None = None,
+        eval_y: pd.Series | pd.DataFrame | None = None,
     ) -> Self:
         mapie = self._mapie
         if mapie is None:
             raise ValueError("mapie is null")
         if y is None:
             raise ValueError("y is null")
+        if len(df) <= 5:
+            return self
         mapie.fit(df.to_numpy(), y.to_numpy())
         return self
     def transform(self, df: pd.DataFrame) -> pd.DataFrame:
-        alpha = [0.05, 0.32]
-        _, y_pis = self._mapie.predict(df, alpha=alpha)
-        df = pd.DataFrame(data=None, index=df.index)
-        for i in range(y_pis.shape[1]):
-            for ii in range(y_pis.shape[2]):
-                df[f"{PROBABILITY_COLUMN_PREFIX}{alpha[i]}_{ii == 1}"] = (
-                    y_pis[:, i, ii].flatten().tolist()
-                )
+        try:
+            alpha = []
+            for potential_alpha in [0.05, 0.32]:
+                if len(df) > int(1.0 / potential_alpha):
+                    alpha.append(potential_alpha)
+            if alpha:
+                _, y_pis = self._mapie.predict(df, alpha=alpha)
+                for i in range(y_pis.shape[1]):
+                    if i >= len(alpha):
+                        continue
+                    for ii in range(y_pis.shape[2]):
+                        alpha_val = alpha[i]
+                        values = y_pis[:, i, ii].flatten().tolist()
+                        df[f"{PROBABILITY_COLUMN_PREFIX}{alpha_val}_{ii == 1}"] = values
+        except sklearn.exceptions.NotFittedError as exc:  # type: ignore
+            logging.warning(str(exc))
         return df

{wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/calibrator/vennabers_calibrator.py RENAMED Viewed

@@ -17,6 +17,8 @@ _CALIBRATOR_FILENAME = "vennabers.joblib"
 class VennabersCalibrator(Calibrator):
     """A class that uses venn abers as a calibrator."""
+    # pylint: disable=too-many-positional-arguments,too-many-arguments
     def __init__(self, model: Model):
         super().__init__(model)
         self._vennabers = VennAbers()
@@ -39,6 +41,8 @@ class VennabersCalibrator(Calibrator):
         df: pd.DataFrame,
         y: pd.Series | pd.DataFrame | None = None,
         w: pd.Series | None = None,
+        eval_x: pd.DataFrame | None = None,
+        eval_y: pd.Series | pd.DataFrame | None = None,
     ) -> Self:
         vennabers = self._vennabers
         if vennabers is None:

{wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/fit.py RENAMED Viewed

@@ -8,11 +8,15 @@ import pandas as pd
 class Fit:
     """The prototype fit class."""
+    # pylint: disable=too-many-positional-arguments,too-many-arguments
     def fit(
         self,
         df: pd.DataFrame,
         y: pd.Series | pd.DataFrame | None = None,
         w: pd.Series | None = None,
+        eval_x: pd.DataFrame | None = None,
+        eval_y: pd.Series | pd.DataFrame | None = None,
     ) -> Self:
         """Fit the dataframe."""
         raise NotImplementedError("fit not implemented in parent class.")
@@ -25,6 +29,9 @@ class Fit:
         self,
         df: pd.DataFrame,
         y: pd.Series | pd.DataFrame | None = None,
+        w: pd.Series | None = None,
+        eval_x: pd.DataFrame | None = None,
+        eval_y: pd.Series | pd.DataFrame | None = None,
     ) -> pd.DataFrame:
         """Fit and then trasnfrom the dataframe."""
-        return self.fit(df, y=y).transform(df)
+        return self.fit(df, y=y, w=w, eval_x=eval_x, eval_y=eval_y).transform(df)

wavetrainer-0.0.6/wavetrainer/model/catboost_classifier_wrap.py ADDED Viewed

@@ -0,0 +1,15 @@
+"""A wrapper for catboost classifier to handle some edge cases."""
+# pylint: disable=duplicate-code
+from catboost import CatBoostClassifier  # type: ignore
+from .catboost_kwargs import handle_fit_kwargs
+class CatBoostClassifierWrapper(CatBoostClassifier):
+    """A wrapper for the catboost classifier."""
+    def fit(self, *args, **kwargs):
+        kwargs = handle_fit_kwargs(*args, **kwargs)
+        return super().fit(*args, **kwargs)

wavetrainer-0.0.6/wavetrainer/model/catboost_kwargs.py ADDED Viewed

@@ -0,0 +1,35 @@
+"""A list of constant catboost kwargs."""
+from typing import Any
+import numpy as np
+from catboost import Pool  # type: ignore
+ORIGINAL_X = "original_x"
+EVAL_SET = "eval_set"
+def handle_fit_kwargs(*args, **kwargs) -> dict[str, Any]:
+    """Handles keyword args coming into a catboost fit method."""
+    if ORIGINAL_X in kwargs:
+        df = kwargs[ORIGINAL_X]
+        eval_x, eval_y = kwargs[EVAL_SET]
+        fit_x = args[0]
+        fix_x_cp = fit_x.copy()
+        # Stupid code to ensure eval is feature equivalent to train data
+        included_columns = []
+        for i in range(fix_x_cp.shape[1]):
+            arr_col_values = fix_x_cp[:, i]
+            for col in df.columns:
+                df_col_values = df[col].values
+                if np.allclose(df_col_values, arr_col_values, equal_nan=True):
+                    included_columns.append(col)
+                    df = df.drop(col, axis=1)
+                    break
+        eval_x = eval_x[included_columns]
+        kwargs[EVAL_SET] = Pool(eval_x, label=eval_y)
+        del kwargs[ORIGINAL_X]
+    return kwargs

wavetrainer-0.0.6/wavetrainer/model/catboost_model.py ADDED Viewed

@@ -0,0 +1,209 @@
+"""A model that wraps catboost."""
+import json
+import os
+from typing import Any, Self
+import optuna
+import pandas as pd
+from catboost import CatBoost, Pool  # type: ignore
+from ..model_type import ModelType, determine_model_type
+from .catboost_classifier_wrap import CatBoostClassifierWrapper
+from .catboost_kwargs import EVAL_SET, ORIGINAL_X
+from .catboost_regressor_wrap import CatBoostRegressorWrapper
+from .model import PREDICTION_COLUMN, PROBABILITY_COLUMN_PREFIX, Model
+_MODEL_FILENAME = "model.cbm"
+_MODEL_PARAMS_FILENAME = "model_params.json"
+_ITERATIONS_KEY = "iterations"
+_LEARNING_RATE_KEY = "learning_rate"
+_DEPTH_KEY = "depth"
+_L2_LEAF_REG_KEY = "l2_leaf_reg"
+_BOOSTING_TYPE_KEY = "boosting_type"
+_MODEL_TYPE_KEY = "model_type"
+class CatboostModel(Model):
+    """A class that uses Catboost as a model."""
+    # pylint: disable=too-many-positional-arguments,too-many-arguments
+    _catboost: CatBoost | None
+    _iterations: None | int
+    _learning_rate: None | float
+    _depth: None | int
+    _l2_leaf_reg: None | float
+    _boosting_type: None | str
+    _model_type: None | ModelType
+    @classmethod
+    def name(cls) -> str:
+        return "catboost"
+    def __init__(self) -> None:
+        super().__init__()
+        self._catboost = None
+        self._iterations = None
+        self._learning_rate = None
+        self._depth = None
+        self._l2_leaf_reg = None
+        self._boosting_type = None
+        self._model_type = None
+    @property
+    def estimator(self) -> Any:
+        return self._provide_catboost()
+    def pre_fit(
+        self,
+        df: pd.DataFrame,
+        y: pd.Series | pd.DataFrame | None,
+        eval_x: pd.DataFrame | None = None,
+        eval_y: pd.Series | pd.DataFrame | None = None,
+    ):
+        if y is None:
+            raise ValueError("y is null.")
+        self._model_type = determine_model_type(y)
+        return {
+            EVAL_SET: (eval_x, eval_y),
+            "cat_features": df.select_dtypes(include="category").columns.tolist(),
+            ORIGINAL_X: df,
+        }
+    def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
+        self._iterations = trial.suggest_int(_ITERATIONS_KEY, 100, 10000)
+        self._learning_rate = trial.suggest_float(_LEARNING_RATE_KEY, 0.001, 0.3)
+        self._depth = trial.suggest_int(_DEPTH_KEY, 1, 12)
+        self._l2_leaf_reg = trial.suggest_float(_L2_LEAF_REG_KEY, 3.0, 50.0)
+        self._boosting_type = trial.suggest_categorical(
+            _BOOSTING_TYPE_KEY, ["Ordered", "Plain"]
+        )
+    def load(self, folder: str) -> None:
+        with open(
+            os.path.join(folder, _MODEL_PARAMS_FILENAME), encoding="utf8"
+        ) as handle:
+            params = json.load(handle)
+            self._iterations = params[_ITERATIONS_KEY]
+            self._learning_rate = params[_LEARNING_RATE_KEY]
+            self._depth = params[_DEPTH_KEY]
+            self._l2_leaf_reg = params[_L2_LEAF_REG_KEY]
+            self._boosting_type = params[_BOOSTING_TYPE_KEY]
+            self._model_type = ModelType(params[_MODEL_TYPE_KEY])
+        catboost = self._provide_catboost()
+        catboost.load_model(os.path.join(folder, _MODEL_FILENAME))
+    def save(self, folder: str) -> None:
+        with open(
+            os.path.join(folder, _MODEL_PARAMS_FILENAME), "w", encoding="utf8"
+        ) as handle:
+            json.dump(
+                {
+                    _ITERATIONS_KEY: self._iterations,
+                    _LEARNING_RATE_KEY: self._learning_rate,
+                    _DEPTH_KEY: self._depth,
+                    _L2_LEAF_REG_KEY: self._l2_leaf_reg,
+                    _BOOSTING_TYPE_KEY: self._boosting_type,
+                    _MODEL_TYPE_KEY: str(self._model_type),
+                },
+                handle,
+            )
+        catboost = self._provide_catboost()
+        catboost.save_model(os.path.join(folder, _MODEL_FILENAME))
+    def fit(
+        self,
+        df: pd.DataFrame,
+        y: pd.Series | pd.DataFrame | None = None,
+        w: pd.Series | None = None,
+        eval_x: pd.DataFrame | None = None,
+        eval_y: pd.Series | pd.DataFrame | None = None,
+    ) -> Self:
+        if y is None:
+            raise ValueError("y is null.")
+        self._model_type = determine_model_type(y)
+        catboost = self._provide_catboost()
+        train_pool = Pool(
+            df,
+            label=y,
+            weight=w,
+        )
+        eval_pool = Pool(
+            eval_x,
+            label=eval_y,
+        )
+        catboost.fit(
+            train_pool,
+            early_stopping_rounds=100,
+            verbose=False,
+            metric_period=100,
+            eval_set=eval_pool,
+        )
+        return self
+    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
+        pred_pool = Pool(df)
+        catboost = self._provide_catboost()
+        pred = catboost.predict(pred_pool)
+        df = pd.DataFrame(
+            index=df.index,
+            data={
+                PREDICTION_COLUMN: pred.flatten(),
+            },
+        )
+        if self._model_type != ModelType.REGRESSION:
+            proba = catboost.predict_proba(pred_pool)  # type: ignore
+            for i in range(proba.shape[1]):
+                df[f"{PROBABILITY_COLUMN_PREFIX}{i}"] = proba[:, i]
+        return df
+    def _provide_catboost(self) -> CatBoost:
+        catboost = self._catboost
+        if catboost is None:
+            match self._model_type:
+                case ModelType.BINARY:
+                    catboost = CatBoostClassifierWrapper(
+                        iterations=self._iterations,
+                        learning_rate=self._learning_rate,
+                        depth=self._depth,
+                        l2_leaf_reg=self._l2_leaf_reg,
+                        boosting_type=self._boosting_type,
+                        early_stopping_rounds=100,
+                        metric_period=100,
+                    )
+                case ModelType.REGRESSION:
+                    catboost = CatBoostRegressorWrapper(
+                        iterations=self._iterations,
+                        learning_rate=self._learning_rate,
+                        depth=self._depth,
+                        l2_leaf_reg=self._l2_leaf_reg,
+                        boosting_type=self._boosting_type,
+                        early_stopping_rounds=100,
+                        metric_period=100,
+                    )
+                case ModelType.BINNED_BINARY:
+                    catboost = CatBoostClassifierWrapper(
+                        iterations=self._iterations,
+                        learning_rate=self._learning_rate,
+                        depth=self._depth,
+                        l2_leaf_reg=self._l2_leaf_reg,
+                        boosting_type=self._boosting_type,
+                        early_stopping_rounds=100,
+                        metric_period=100,
+                    )
+                case ModelType.MULTI_CLASSIFICATION:
+                    catboost = CatBoostClassifierWrapper(
+                        iterations=self._iterations,
+                        learning_rate=self._learning_rate,
+                        depth=self._depth,
+                        l2_leaf_reg=self._l2_leaf_reg,
+                        boosting_type=self._boosting_type,
+                        early_stopping_rounds=100,
+                        metric_period=100,
+                    )
+            self._catboost = catboost
+        if catboost is None:
+            raise ValueError("catboost is null")
+        return catboost

wavetrainer-0.0.6/wavetrainer/model/catboost_regressor_wrap.py ADDED Viewed

@@ -0,0 +1,13 @@
+"""A wrapper for catboost regressor to handle some edge cases."""
+from catboost import CatBoostRegressor  # type: ignore
+from .catboost_kwargs import handle_fit_kwargs
+class CatBoostRegressorWrapper(CatBoostRegressor):
+    """A wrapper for the catboost regressor."""
+    def fit(self, *args, **kwargs):
+        kwargs = handle_fit_kwargs(*args, **kwargs)
+        return super().fit(*args, **kwargs)

{wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/model/model.py RENAMED Viewed

@@ -2,6 +2,8 @@
 from typing import Any
+import pandas as pd
 from ..fit import Fit
 from ..params import Params
@@ -21,3 +23,13 @@ class Model(Params, Fit):
     def estimator(self) -> Any:
         """The estimator backing the model."""
         raise NotImplementedError("estimator not implemented in parent class.")
+    def pre_fit(
+        self,
+        df: pd.DataFrame,
+        y: pd.Series | pd.DataFrame | None,
+        eval_x: pd.DataFrame | None = None,
+        eval_y: pd.Series | pd.DataFrame | None = None,
+    ) -> dict[str, Any]:
+        """A call to make sure the model is prepared for the target type."""
+        raise NotImplementedError("pre_fit not implemented in parent class.")

{wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/model/model_router.py RENAMED Viewed

@@ -20,6 +20,8 @@ _MODELS = {
 class ModelRouter(Model):
     """A router that routes to a different weights class."""
+    # pylint: disable=too-many-positional-arguments,too-many-arguments
     _model: Model | None
     def __init__(self) -> None:
@@ -37,10 +39,23 @@ class ModelRouter(Model):
             raise ValueError("model is null")
         return model.estimator
+    def pre_fit(
+        self,
+        df: pd.DataFrame,
+        y: pd.Series | pd.DataFrame | None,
+        eval_x: pd.DataFrame | None = None,
+        eval_y: pd.Series | pd.DataFrame | None = None,
+    ) -> dict[str, Any]:
+        model = self._model
+        if model is None:
+            raise ValueError("model is null")
+        return model.pre_fit(df, y=y, eval_x=eval_x, eval_y=eval_y)
     def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
         self._model = _MODELS[
             trial.suggest_categorical("model", list(_MODELS.keys()))
         ]()
+        self._model.set_options(trial)
     def load(self, folder: str) -> None:
         with open(os.path.join(folder, _MODEL_ROUTER_FILE), encoding="utf8") as handle:
@@ -69,11 +84,13 @@ class ModelRouter(Model):
         df: pd.DataFrame,
         y: pd.Series | pd.DataFrame | None = None,
         w: pd.Series | None = None,
+        eval_x: pd.DataFrame | None = None,
+        eval_y: pd.Series | pd.DataFrame | None = None,
     ) -> Self:
         model = self._model
         if model is None:
             raise ValueError("model is null")
-        model.fit(df, y=y, w=w)
+        model.fit(df, y=y, w=w, eval_x=eval_x, eval_y=eval_y)
         return self
     def transform(self, df: pd.DataFrame) -> pd.DataFrame:

{wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/model_type.py RENAMED Viewed

@@ -1,17 +1,17 @@
 """An enum to define the model type."""
-from enum import Enum
+from enum import StrEnum, auto
 import pandas as pd
-class ModelType(Enum):
+class ModelType(StrEnum):
     """The type of model being run."""
-    BINARY = 1
-    REGRESSION = 2
-    BINNED_BINARY = 3
-    MULTI_CLASSIFICATION = 4
+    BINARY = auto()
+    REGRESSION = auto()
+    BINNED_BINARY = auto()
+    MULTI_CLASSIFICATION = auto()
 def determine_model_type(y: pd.Series | pd.DataFrame) -> ModelType:

{wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/reducer/base_selector_reducer.py RENAMED Viewed

@@ -15,6 +15,8 @@ from .reducer import Reducer
 class BaseSelectorReducer(Reducer):
     """A class that uses the base selector from the feature engine."""
+    # pylint: disable=too-many-positional-arguments,too-many-arguments
     def __init__(self, base_selector: BaseSelector, file_name: str) -> None:
         super().__init__()
         self._base_selector = base_selector
@@ -40,6 +42,8 @@ class BaseSelectorReducer(Reducer):
         df: pd.DataFrame,
         y: pd.Series | pd.DataFrame | None = None,
         w: pd.Series | None = None,
+        eval_x: pd.DataFrame | None = None,
+        eval_y: pd.Series | pd.DataFrame | None = None,
     ) -> Self:
         try:
             self._base_selector.fit(df)  # type: ignore

{wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/reducer/combined_reducer.py RENAMED Viewed

@@ -20,6 +20,8 @@ _REDUCERS_KEY = "reducers"
 class CombinedReducer(Reducer):
     """A reducer that combines a series of reducers."""
+    # pylint: disable=too-many-positional-arguments,too-many-arguments
     def __init__(self):
         super().__init__()
         self._reducers = [
@@ -73,6 +75,8 @@ class CombinedReducer(Reducer):
         df: pd.DataFrame,
         y: pd.Series | pd.DataFrame | None = None,
         w: pd.Series | None = None,
+        eval_x: pd.DataFrame | None = None,
+        eval_y: pd.Series | pd.DataFrame | None = None,
     ) -> Self:
         for reducer in self._reducers:
             df = reducer.fit_transform(df)

{wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/reducer/nonnumeric_reducer.py RENAMED Viewed

@@ -11,6 +11,8 @@ from .reducer import Reducer
 class NonNumericReducer(Reducer):
     """A class that removes non numeric columns from a dataframe."""
+    # pylint: disable=too-many-positional-arguments,too-many-arguments
     @classmethod
     def name(cls) -> str:
         return "nonnumeric"
@@ -29,6 +31,8 @@ class NonNumericReducer(Reducer):
         df: pd.DataFrame,
         y: pd.Series | pd.DataFrame | None = None,
         w: pd.Series | None = None,
+        eval_x: pd.DataFrame | None = None,
+        eval_y: pd.Series | pd.DataFrame | None = None,
     ) -> Self:
         return self

{wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/selector/selector.py RENAMED Viewed

@@ -7,6 +7,7 @@ from typing import Self
 import joblib  # type: ignore
 import optuna
 import pandas as pd
+import sklearn  # type: ignore
 from sklearn.feature_selection import RFE  # type: ignore
 from ..fit import Fit
@@ -19,24 +20,20 @@ _SELECTOR_FILE = "selector.joblib"
 class Selector(Params, Fit):
     """The selector class."""
-    def __init__(self, model: Model, total_features: int):
+    # pylint: disable=too-many-positional-arguments,too-many-arguments
+    _selector: RFE | None
+    def __init__(self, model: Model):
         super().__init__()
         self._model = model
         self._feature_ratio = 0.0
         self._steps = 0
-        n_features_to_select = max(1, total_features * self._feature_ratio)
-        self._selector = RFE(
-            model.estimator,
-            n_features_to_select=n_features_to_select,
-            step=self._steps,
-            verbose=1,
-        )
+        self._selector = None
     def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
         self._feature_ratio = trial.suggest_float("feature_ratio", 0.0, 1.0)
-        steps = trial.suggest_int("steps", 1, 16)
-        self._steps = steps
-        self._selector.step = steps
+        self._steps = trial.suggest_int("steps", 1, 16)
     def load(self, folder: str) -> None:
         self._selector = joblib.load(os.path.join(folder, _SELECTOR_FILE))
@@ -49,20 +46,35 @@ class Selector(Params, Fit):
         df: pd.DataFrame,
         y: pd.Series | pd.DataFrame | None = None,
         w: pd.Series | None = None,
+        eval_x: pd.DataFrame | None = None,
+        eval_y: pd.Series | pd.DataFrame | None = None,
     ) -> Self:
+        sklearn.set_config(enable_metadata_routing=False)
+        model_kwargs = self._model.pre_fit(df, y=y, eval_x=eval_x, eval_y=eval_y)
         if not isinstance(y, pd.Series):
             raise ValueError("y is not a series.")
+        n_features_to_select = max(1, int(len(df.columns) * self._feature_ratio))
+        self._selector = RFE(
+            self._model.estimator,
+            n_features_to_select=n_features_to_select,
+            step=max(
+                1,
+                int((len(df.columns) - n_features_to_select) / self._steps),
+            ),
+        )
         try:
-            self._selector.fit(df, y=y, sample_weight=w)
+            self._selector.fit(df, y=y, sample_weight=w, **model_kwargs)
         except ValueError as exc:
             # Catch issues with 1 feature as a reduction target.
             logging.warning(str(exc))
         return self
     def transform(self, df: pd.DataFrame) -> pd.DataFrame:
+        selector = self._selector
+        if selector is None:
+            raise ValueError("selector is null.")
         try:
-            return df[self._selector.get_feature_names_out()]
+            return df[selector.get_feature_names_out()]
         except AttributeError as exc:
             # Catch issues with 1 feature as a reduction target.
             logging.warning(str(exc))

{wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/trainer.py RENAMED Viewed

@@ -11,7 +11,7 @@ from typing import Self
 import optuna
 import pandas as pd
 import tqdm
-from sklearn.metrics import accuracy_score, f1_score  # type: ignore
+from sklearn.metrics import f1_score, r2_score  # type: ignore
 from .calibrator.calibrator_router import CalibratorRouter
 from .exceptions import WavetrainException
@@ -158,6 +158,8 @@ class Trainer(Fit):
         df: pd.DataFrame,
         y: pd.Series | pd.DataFrame | None = None,
         w: pd.Series | None = None,
+        eval_x: pd.DataFrame | None = None,
+        eval_y: pd.Series | pd.DataFrame | None = None,
     ) -> Self:
         """Perform a train on the data to fit to the targets."""
         if y is None:
@@ -215,12 +217,14 @@ class Trainer(Fit):
                     model.set_options(trial)
                     # Train
-                    selector = Selector(model, len(x_train.columns.values))
+                    selector = Selector(model)
                     selector.set_options(trial)
-                    selector.fit(x_train, y=y_train, w=w)
+                    selector.fit(x_train, y=y_train, w=w, eval_x=x_test, eval_y=y_test)
                     x_train = selector.transform(x_train)
                     x_test = selector.transform(x_test)
-                    x_pred = model.fit_transform(x_train, y=y_train)
+                    x_pred = model.fit_transform(
+                        x_train, y=y_train, w=w, eval_x=x_test, eval_y=y_test
+                    )
                     # Calibrate
                     calibrator = CalibratorRouter(model)
@@ -243,8 +247,8 @@ class Trainer(Fit):
                     y_pred = model.transform(x_test)
                     y_pred = calibrator.transform(y_pred)
                     if determine_model_type(y_series) == ModelType.REGRESSION:
-                        return accuracy_score(y_test, y_pred[[PREDICTION_COLUMN]])
-                    return f1_score(y_test, y_pred[[PREDICTION_COLUMN]])
+                        return float(r2_score(y_test, y_pred[[PREDICTION_COLUMN]]))
+                    return float(f1_score(y_test, y_pred[[PREDICTION_COLUMN]]))
                 except WavetrainException as exc:
                     logging.warning(str(exc))
                     return -1.0
@@ -286,9 +290,15 @@ class Trainer(Fit):
             train_len = len(df[dt_index < start_test_index])
             test_len = len(df.loc[start_test_index:start_validation_index])
+            last_processed_dt = None
             for count, test_idx in tqdm.tqdm(
-                enumerate(df[dt_index >= start_test_index].index)
+                enumerate(test_dt_index[test_dt_index >= start_test_index])
             ):
+                if (
+                    last_processed_dt is not None
+                    and test_idx < last_processed_dt + self._walkforward_timedelta
+                ):
+                    continue
                 test_dt = test_idx.to_pydatetime()
                 found = False
                 for trial in study.trials:
@@ -373,7 +383,7 @@ class Trainer(Fit):
                 model = ModelRouter()
                 model.load(folder)
-                selector = Selector(model, len(df.columns.values))
+                selector = Selector(model)
                 selector.load(folder)
                 calibrator = CalibratorRouter(model)

{wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/weights/class_weights.py RENAMED Viewed

@@ -14,6 +14,8 @@ from .weights import WEIGHTS_COLUMN, Weights
 class ClassWeights(Weights):
     """Class weight class."""
+    # pylint: disable=too-many-positional-arguments,too-many-arguments
     _class_weights: dict[Any, float]
     def __init__(self) -> None:
@@ -39,6 +41,8 @@ class ClassWeights(Weights):
         df: pd.DataFrame,
         y: pd.Series | pd.DataFrame | None = None,
         w: pd.Series | None = None,
+        eval_x: pd.DataFrame | None = None,
+        eval_y: pd.Series | pd.DataFrame | None = None,
     ) -> Self:
         if not isinstance(y, pd.Series):
             raise ValueError("y is not a series.")

{wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/weights/combined_weights.py RENAMED Viewed

@@ -13,6 +13,8 @@ from .weights_router import WeightsRouter
 class CombinedWeights(Weights):
     """A weights class that combines multiple weights."""
+    # pylint: disable=too-many-positional-arguments,too-many-arguments
     def __init__(self) -> None:
         super().__init__()
         self._weights = [WeightsRouter(), ClassWeights()]
@@ -38,6 +40,8 @@ class CombinedWeights(Weights):
         df: pd.DataFrame,
         y: pd.Series | pd.DataFrame | None = None,
         w: pd.Series | None = None,
+        eval_x: pd.DataFrame | None = None,
+        eval_y: pd.Series | pd.DataFrame | None = None,
     ) -> Self:
         for weights in self._weights:
             weights.fit(df, y=y)

{wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/weights/exponential_weights.py RENAMED Viewed

@@ -12,6 +12,8 @@ from .weights import WEIGHTS_COLUMN, Weights
 class ExponentialWeights(Weights):
     """Exponential weight class."""
+    # pylint: disable=too-many-positional-arguments,too-many-arguments
     @classmethod
     def name(cls) -> str:
         """The name of the weight class."""
@@ -31,6 +33,8 @@ class ExponentialWeights(Weights):
         df: pd.DataFrame,
         y: pd.Series | pd.DataFrame | None = None,
         w: pd.Series | None = None,
+        eval_x: pd.DataFrame | None = None,
+        eval_y: pd.Series | pd.DataFrame | None = None,
     ) -> Self:
         return self

{wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/weights/linear_weights.py RENAMED Viewed

@@ -12,7 +12,7 @@ from .weights import WEIGHTS_COLUMN, Weights
 class LinearWeights(Weights):
     """Linear weight class."""
-    # pylint: disable=duplicate-code
+    # pylint: disable=duplicate-code,too-many-positional-arguments,too-many-arguments
     @classmethod
     def name(cls) -> str:
@@ -33,6 +33,8 @@ class LinearWeights(Weights):
         df: pd.DataFrame,
         y: pd.Series | pd.DataFrame | None = None,
         w: pd.Series | None = None,
+        eval_x: pd.DataFrame | None = None,
+        eval_y: pd.Series | pd.DataFrame | None = None,
     ) -> Self:
         return self

{wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/weights/noop_weights.py RENAMED Viewed

@@ -12,7 +12,7 @@ from .weights import WEIGHTS_COLUMN, Weights
 class NoopWeights(Weights):
     """Noop weight class."""
-    # pylint: disable=duplicate-code
+    # pylint: disable=duplicate-code,too-many-positional-arguments,too-many-arguments
     @classmethod
     def name(cls) -> str:
@@ -33,6 +33,8 @@ class NoopWeights(Weights):
         df: pd.DataFrame,
         y: pd.Series | pd.DataFrame | None = None,
         w: pd.Series | None = None,
+        eval_x: pd.DataFrame | None = None,
+        eval_y: pd.Series | pd.DataFrame | None = None,
     ) -> Self:
         return self

{wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/weights/sigmoid_weights.py RENAMED Viewed

@@ -13,7 +13,7 @@ from .weights import WEIGHTS_COLUMN, Weights
 class SigmoidWeights(Weights):
     """Sigmoid weight class."""
-    # pylint: disable=duplicate-code
+    # pylint: disable=duplicate-code,too-many-positional-arguments,too-many-arguments
     @classmethod
     def name(cls) -> str:
@@ -34,6 +34,8 @@ class SigmoidWeights(Weights):
         df: pd.DataFrame,
         y: pd.Series | pd.DataFrame | None = None,
         w: pd.Series | None = None,
+        eval_x: pd.DataFrame | None = None,
+        eval_y: pd.Series | pd.DataFrame | None = None,
     ) -> Self:
         return self

{wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/weights/weights_router.py RENAMED Viewed

@@ -26,6 +26,8 @@ _WEIGHTS = {
 class WeightsRouter(Weights):
     """A router that routes to a different weights class."""
+    # pylint: disable=too-many-positional-arguments,too-many-arguments
     _weights: Weights | None
     def __init__(self) -> None:
@@ -71,6 +73,8 @@ class WeightsRouter(Weights):
         df: pd.DataFrame,
         y: pd.Series | pd.DataFrame | None = None,
         w: pd.Series | None = None,
+        eval_x: pd.DataFrame | None = None,
+        eval_y: pd.Series | pd.DataFrame | None = None,
     ) -> Self:
         return self

{wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer/windower/windower.py RENAMED Viewed

@@ -18,6 +18,8 @@ _LOOKBACK_KEY = "lookback"
 class Windower(Params, Fit):
     """The windower class."""
+    # pylint: disable=too-many-positional-arguments,too-many-arguments
     _lookback_ratio: float | None
     def __init__(self, dt_column: str | None):
@@ -48,6 +50,8 @@ class Windower(Params, Fit):
         df: pd.DataFrame,
         y: pd.Series | pd.DataFrame | None = None,
         w: pd.Series | None = None,
+        eval_x: pd.DataFrame | None = None,
+        eval_y: pd.Series | pd.DataFrame | None = None,
     ) -> Self:
         lookback_ratio = self._lookback_ratio
         if lookback_ratio is None:

{wavetrainer-0.0.4 → wavetrainer-0.0.6/wavetrainer.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: wavetrainer
-Version: 0.0.4
+Version: 0.0.6
 Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
 Home-page: https://github.com/8W9aG/wavetrainer
 Author: Will Sackfield
@@ -21,7 +21,6 @@ Requires-Dist: scipy>=1.15.2
 Requires-Dist: catboost>=1.2.7
 Requires-Dist: venn-abers>=1.4.6
 Requires-Dist: mapie>=0.9.2
-Requires-Dist: shapiq>=1.2.2
 # wavetrainer
@@ -49,7 +48,6 @@ Python 3.11.6:
 - [catboost](https://catboost.ai/)
 - [venn-abers](https://github.com/ip200/venn-abers)
 - [mapie](https://mapie.readthedocs.io/en/stable/)
-- [shapiq](https://github.com/mmschlk/shapiq)
 ## Raison D'être :thought_balloon:

{wavetrainer-0.0.4 → wavetrainer-0.0.6}/wavetrainer.egg-info/SOURCES.txt RENAMED Viewed

@@ -9,7 +9,6 @@ wavetrainer/__init__.py
 wavetrainer/create.py
 wavetrainer/exceptions.py
 wavetrainer/fit.py
-wavetrainer/load.py
 wavetrainer/model_type.py
 wavetrainer/params.py
 wavetrainer/trainer.py
@@ -25,7 +24,10 @@ wavetrainer/calibrator/calibrator_router.py
 wavetrainer/calibrator/mapie_calibrator.py
 wavetrainer/calibrator/vennabers_calibrator.py
 wavetrainer/model/__init__.py
+wavetrainer/model/catboost_classifier_wrap.py
+wavetrainer/model/catboost_kwargs.py
 wavetrainer/model/catboost_model.py
+wavetrainer/model/catboost_regressor_wrap.py
 wavetrainer/model/model.py
 wavetrainer/model/model_router.py
 wavetrainer/reducer/__init__.py

wavetrainer-0.0.4/requirements.txt → wavetrainer-0.0.6/wavetrainer.egg-info/requires.txt RENAMED Viewed

@@ -8,4 +8,3 @@ scipy>=1.15.2
 catboost>=1.2.7
 venn-abers>=1.4.6
 mapie>=0.9.2
-shapiq>=1.2.2

wavetrainer-0.0.4/wavetrainer/__init__.py DELETED Viewed

@@ -1,10 +0,0 @@
-"""The wavetrain main module."""
-from .create import create
-from .load import load
-__VERSION__ = "0.0.4"
-__all__ = (
-    "create",
-    "load",
-)

wavetrainer-0.0.4/wavetrainer/load.py DELETED Viewed

@@ -1,8 +0,0 @@
-"""The function for loading the trainer state from disk."""
-from .trainer import Trainer
-def load(folder: str) -> Trainer:
-    """Loads the trainer from the folder."""
-    raise NotImplementedError("load isn't implemented.")

wavetrainer-0.0.4/wavetrainer/model/catboost_model.py DELETED Viewed

@@ -1,80 +0,0 @@
-"""A model that wraps catboost."""
-import os
-from typing import Any, Self
-import optuna
-import pandas as pd
-from catboost import CatBoostClassifier, Pool  # type: ignore
-from .model import PREDICTION_COLUMN, PROBABILITY_COLUMN_PREFIX, Model
-_MODEL_FILENAME = "model.cbm"
-class CatboostModel(Model):
-    """A class that uses Catboost as a model."""
-    @classmethod
-    def name(cls) -> str:
-        return "catboost"
-    def __init__(self) -> None:
-        super().__init__()
-        self._catboost = CatBoostClassifier()
-    @property
-    def estimator(self) -> Any:
-        return self._catboost
-    def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
-        iterations = trial.suggest_int("iterations", 100, 10000)
-        learning_rate = trial.suggest_float("learning_rate", 0.001, 0.3)
-        depth = trial.suggest_int("depth", 1, 12)
-        l2_leaf_reg = trial.suggest_float("l2_leaf_reg", 3.0, 50.0)
-        boosting_type = trial.suggest_categorical("boosting_type", ["Ordered", "Plain"])
-        self._catboost.set_params(
-            iterations=iterations,
-            learning_rate=learning_rate,
-            depth=depth,
-            l2_leaf_reg=l2_leaf_reg,
-            boosting_type=boosting_type,
-            early_stopping_rounds=100,
-        )
-    def load(self, folder: str) -> None:
-        self._catboost.load_model(os.path.join(folder, _MODEL_FILENAME))
-    def save(self, folder: str) -> None:
-        self._catboost.save_model(os.path.join(folder, _MODEL_FILENAME))
-    def fit(
-        self,
-        df: pd.DataFrame,
-        y: pd.Series | pd.DataFrame | None = None,
-        w: pd.Series | None = None,
-    ) -> Self:
-        train_pool = Pool(
-            df,
-            label=y,
-            weight=w,
-        )
-        self._catboost.fit(
-            train_pool,
-            early_stopping_rounds=100,
-        )
-        return self
-    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
-        pred_pool = Pool(df)
-        pred = self._catboost.predict(pred_pool)
-        proba = self._catboost.predict_proba(pred_pool)
-        df = pd.DataFrame(
-            index=df.index,
-            data={
-                PREDICTION_COLUMN: pred.flatten(),
-            },
-        )
-        for i in range(proba.shape[1]):
-            df[f"{PROBABILITY_COLUMN_PREFIX}{i}"] = proba[:, i]
-        return df