PyPI - wavetrainer - Versions diffs - 0.0.6__tar.gz → 0.0.8__tar.gz - Mend

wavetrainer 0.0.6tar.gz → 0.0.8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

{wavetrainer-0.0.6/wavetrainer.egg-info → wavetrainer-0.0.8}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: wavetrainer
-Version: 0.0.6
+Version: 0.0.8
 Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
 Home-page: https://github.com/8W9aG/wavetrainer
 Author: Will Sackfield

{wavetrainer-0.0.6 → wavetrainer-0.0.8}/setup.py RENAMED Viewed

@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
 setup(
     name='wavetrainer',
-    version='0.0.6',
+    version='0.0.8',
     description='A library for automatically finding the optimal model within feature and hyperparameter space.',
     long_description=long_description,
     long_description_content_type='text/markdown',

wavetrainer-0.0.8/tests/model/catboost_kwargs_test.py ADDED Viewed

@@ -0,0 +1,28 @@
+"""Tests for the catboost kwargs handler class."""
+import unittest
+import pandas as pd
+from wavetrainer.model.catboost_kwargs import handle_fit_kwargs
+class TestCatboostKwargs(unittest.TestCase):
+    def test_handle_fit_kwargs(self):
+        x_train = pd.DataFrame(data={
+            "thing": [0.0, 1.0, 2.0, 3.0, 4.0],
+        })
+        x_train["thing"] = x_train["thing"].astype('category')
+        y_train = pd.Series(data=[1.0, 2.0, 3.0, 4.0])
+        x_test = pd.DataFrame(data={
+            "thing": [0.0, 1.0, 2.0, 3.0, 4.0],
+        })
+        x_test["thing"] = x_test["thing"].astype('category')
+        y_test = pd.Series(data=[1.0, 2.0, 3.0, 4.0])
+        args, _ = handle_fit_kwargs(
+            x_train,
+            y_train,
+            eval_set=(x_test, y_test),
+            cat_features=x_train.select_dtypes(include="category").columns.tolist(),
+        )
+        assert len(args) == 2

{wavetrainer-0.0.6 → wavetrainer-0.0.8}/tests/trainer_test.py RENAMED Viewed

@@ -20,10 +20,11 @@ class TestTrainer(unittest.TestCase):
                 data={
                     "column1": x_data,
                     "column2": [(x * random.random()) + random.random() for x in x_data],
-                    "column3": [(x / random.random()) - random.random() for x in x_data],
+                    "column3": [int(((x / random.random()) - random.random()) * 1000.0) for x in x_data],
                 },
                 index=x_index,
             )
+            df["column3"] = df["column3"].astype('category')
             y = pd.DataFrame(
                 data={
                     "y": [x % 2 == 0 for x in x_data],

{wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/__init__.py RENAMED Viewed

@@ -2,5 +2,5 @@
 from .create import create
-__VERSION__ = "0.0.6"
+__VERSION__ = "0.0.8"
 __all__ = ("create",)

{wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/calibrator/mapie_calibrator.py RENAMED Viewed

@@ -60,7 +60,7 @@ class MAPIECalibrator(Calibrator):
         try:
             alpha = []
             for potential_alpha in [0.05, 0.32]:
-                if len(df) > int(1.0 / potential_alpha):
+                if len(df) > int(1.0 / potential_alpha) + 1:
                     alpha.append(potential_alpha)
             if alpha:
                 _, y_pis = self._mapie.predict(df, alpha=alpha)

{wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/model/catboost_classifier_wrap.py RENAMED Viewed

@@ -11,5 +11,5 @@ class CatBoostClassifierWrapper(CatBoostClassifier):
     """A wrapper for the catboost classifier."""
     def fit(self, *args, **kwargs):
-        kwargs = handle_fit_kwargs(*args, **kwargs)
+        args, kwargs = handle_fit_kwargs(*args, **kwargs)
         return super().fit(*args, **kwargs)

wavetrainer-0.0.8/wavetrainer/model/catboost_kwargs.py ADDED Viewed

@@ -0,0 +1,50 @@
+"""A list of constant catboost kwargs."""
+from typing import Any
+import numpy as np
+from catboost import Pool  # type: ignore
+ORIGINAL_X_ARG_KEY = "original_x"
+EVAL_SET_ARG_KEY = "eval_set"
+CAT_FEATURES_ARG_KEY = "cat_features"
+def handle_fit_kwargs(*args, **kwargs) -> tuple[tuple[Any, ...], dict[str, Any]]:
+    """Handles keyword args coming into a catboost fit method."""
+    if ORIGINAL_X_ARG_KEY in kwargs:
+        df = kwargs[ORIGINAL_X_ARG_KEY]
+        eval_x, eval_y = kwargs[EVAL_SET_ARG_KEY]
+        cat_features = kwargs[CAT_FEATURES_ARG_KEY]
+        args_list = list(args)
+        fit_x = args_list[0]
+        fix_x_cp = fit_x.copy()
+        # Stupid code to ensure eval is feature equivalent to train data
+        included_columns = []
+        for i in range(fix_x_cp.shape[1]):
+            arr_col_values = fix_x_cp[:, i]
+            for col in df.columns:
+                if col in included_columns:
+                    continue
+                df_col_values = df[col].values
+                if np.allclose(df_col_values, arr_col_values, equal_nan=True):
+                    included_columns.append(col)
+                    break
+        # We also need to update cat_features or catboost will yell at us
+        cat_features = list(
+            set(list(kwargs.get(CAT_FEATURES_ARG_KEY, []))) & set(included_columns)
+        )
+        args_list[0] = df[included_columns]
+        args = tuple(args_list)
+        eval_x = eval_x[included_columns]
+        kwargs[EVAL_SET_ARG_KEY] = Pool(
+            eval_x,
+            label=eval_y,
+            cat_features=cat_features,
+        )
+        kwargs[CAT_FEATURES_ARG_KEY] = cat_features
+        del kwargs[ORIGINAL_X_ARG_KEY]
+    return args, kwargs

{wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/model/catboost_model.py RENAMED Viewed

@@ -10,7 +10,8 @@ from catboost import CatBoost, Pool  # type: ignore
 from ..model_type import ModelType, determine_model_type
 from .catboost_classifier_wrap import CatBoostClassifierWrapper
-from .catboost_kwargs import EVAL_SET, ORIGINAL_X
+from .catboost_kwargs import (CAT_FEATURES_ARG_KEY, EVAL_SET_ARG_KEY,
+                              ORIGINAL_X_ARG_KEY)
 from .catboost_regressor_wrap import CatBoostRegressorWrapper
 from .model import PREDICTION_COLUMN, PROBABILITY_COLUMN_PREFIX, Model
@@ -66,9 +67,9 @@ class CatboostModel(Model):
             raise ValueError("y is null.")
         self._model_type = determine_model_type(y)
         return {
-            EVAL_SET: (eval_x, eval_y),
-            "cat_features": df.select_dtypes(include="category").columns.tolist(),
-            ORIGINAL_X: df,
+            EVAL_SET_ARG_KEY: (eval_x, eval_y),
+            CAT_FEATURES_ARG_KEY: df.select_dtypes(include="category").columns.tolist(),
+            ORIGINAL_X_ARG_KEY: df,
         }
     def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
@@ -122,6 +123,8 @@ class CatboostModel(Model):
     ) -> Self:
         if y is None:
             raise ValueError("y is null.")
+        if eval_x is None:
+            raise ValueError("eval_x is null.")
         self._model_type = determine_model_type(y)
         catboost = self._provide_catboost()
@@ -129,10 +132,12 @@ class CatboostModel(Model):
             df,
             label=y,
             weight=w,
+            cat_features=df.select_dtypes(include="category").columns.tolist(),
         )
         eval_pool = Pool(
             eval_x,
             label=eval_y,
+            cat_features=eval_x.select_dtypes(include="category").columns.tolist(),
         )
         catboost.fit(
             train_pool,

{wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/model/catboost_regressor_wrap.py RENAMED Viewed

@@ -9,5 +9,5 @@ class CatBoostRegressorWrapper(CatBoostRegressor):
     """A wrapper for the catboost regressor."""
     def fit(self, *args, **kwargs):
-        kwargs = handle_fit_kwargs(*args, **kwargs)
+        args, kwargs = handle_fit_kwargs(*args, **kwargs)
         return super().fit(*args, **kwargs)

wavetrainer-0.0.8/wavetrainer/weights/__init__.py ADDED Viewed

File without changes

{wavetrainer-0.0.6 → wavetrainer-0.0.8/wavetrainer.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: wavetrainer
-Version: 0.0.6
+Version: 0.0.8
 Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
 Home-page: https://github.com/8W9aG/wavetrainer
 Author: Will Sackfield

{wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer.egg-info/SOURCES.txt RENAMED Viewed

@@ -5,6 +5,8 @@ requirements.txt
 setup.py
 tests/__init__.py
 tests/trainer_test.py
+tests/model/__init__.py
+tests/model/catboost_kwargs_test.py
 wavetrainer/__init__.py
 wavetrainer/create.py
 wavetrainer/exceptions.py

wavetrainer-0.0.6/wavetrainer/model/catboost_kwargs.py DELETED Viewed

@@ -1,35 +0,0 @@
-"""A list of constant catboost kwargs."""
-from typing import Any
-import numpy as np
-from catboost import Pool  # type: ignore
-ORIGINAL_X = "original_x"
-EVAL_SET = "eval_set"
-def handle_fit_kwargs(*args, **kwargs) -> dict[str, Any]:
-    """Handles keyword args coming into a catboost fit method."""
-    if ORIGINAL_X in kwargs:
-        df = kwargs[ORIGINAL_X]
-        eval_x, eval_y = kwargs[EVAL_SET]
-        fit_x = args[0]
-        fix_x_cp = fit_x.copy()
-        # Stupid code to ensure eval is feature equivalent to train data
-        included_columns = []
-        for i in range(fix_x_cp.shape[1]):
-            arr_col_values = fix_x_cp[:, i]
-            for col in df.columns:
-                df_col_values = df[col].values
-                if np.allclose(df_col_values, arr_col_values, equal_nan=True):
-                    included_columns.append(col)
-                    df = df.drop(col, axis=1)
-                    break
-        eval_x = eval_x[included_columns]
-        kwargs[EVAL_SET] = Pool(eval_x, label=eval_y)
-        del kwargs[ORIGINAL_X]
-    return kwargs