PyPI - wavetrainer - Versions diffs - 0.0.8__tar.gz → 0.0.10__tar.gz - Mend

wavetrainer 0.0.8tar.gz → 0.0.10tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

{wavetrainer-0.0.8/wavetrainer.egg-info → wavetrainer-0.0.10}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: wavetrainer
-Version: 0.0.8
+Version: 0.0.10
 Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
 Home-page: https://github.com/8W9aG/wavetrainer
 Author: Will Sackfield

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/setup.py RENAMED Viewed

@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
 setup(
     name='wavetrainer',
-    version='0.0.8',
+    version='0.0.10',
     description='A library for automatically finding the optimal model within feature and hyperparameter space.',
     long_description=long_description,
     long_description_content_type='text/markdown',

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/tests/trainer_test.py RENAMED Viewed

@@ -37,3 +37,25 @@ class TestTrainer(unittest.TestCase):
             df = trainer.transform(df)
             print("df:")
             print(df)
+    def test_trainer_dt_column(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            trainer = Trainer(tmpdir, walkforward_timedelta=datetime.timedelta(days=7), trials=1, dt_column="dt_column")
+            x_data = [i for i in range(100)]
+            x_index = [datetime.datetime(2022, 1, 1) + datetime.timedelta(days=i) for i in range(len(x_data))]
+            df = pd.DataFrame(
+                data={
+                    "column1": x_data,
+                    "dt_column": x_index,
+                },
+            )
+            y = pd.DataFrame(
+                data={
+                    "y": [x % 2 == 0 for x in x_data],
+                },
+                index=df.index,
+            )
+            trainer.fit(df, y=y)
+            df = trainer.transform(df)
+            print("df:")
+            print(df)

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/__init__.py RENAMED Viewed

@@ -2,5 +2,5 @@
 from .create import create
-__VERSION__ = "0.0.8"
+__VERSION__ = "0.0.10"
 __all__ = ("create",)

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/model/catboost_model.py RENAMED Viewed

@@ -23,12 +23,13 @@ _DEPTH_KEY = "depth"
 _L2_LEAF_REG_KEY = "l2_leaf_reg"
 _BOOSTING_TYPE_KEY = "boosting_type"
 _MODEL_TYPE_KEY = "model_type"
+_EARLY_STOPPING_ROUNDS = "early_stopping_rounds"
 class CatboostModel(Model):
     """A class that uses Catboost as a model."""
-    # pylint: disable=too-many-positional-arguments,too-many-arguments
+    # pylint: disable=too-many-positional-arguments,too-many-arguments,too-many-instance-attributes
     _catboost: CatBoost | None
     _iterations: None | int
@@ -37,6 +38,7 @@ class CatboostModel(Model):
     _l2_leaf_reg: None | float
     _boosting_type: None | str
     _model_type: None | ModelType
+    _early_stopping_rounds: None | int
     @classmethod
     def name(cls) -> str:
@@ -51,6 +53,7 @@ class CatboostModel(Model):
         self._l2_leaf_reg = None
         self._boosting_type = None
         self._model_type = None
+        self._early_stopping_rounds = None
     @property
     def estimator(self) -> Any:
@@ -80,6 +83,9 @@ class CatboostModel(Model):
         self._boosting_type = trial.suggest_categorical(
             _BOOSTING_TYPE_KEY, ["Ordered", "Plain"]
         )
+        self._early_stopping_rounds = trial.suggest_int(
+            _EARLY_STOPPING_ROUNDS, 10, 1000
+        )
     def load(self, folder: str) -> None:
         with open(
@@ -92,6 +98,7 @@ class CatboostModel(Model):
             self._l2_leaf_reg = params[_L2_LEAF_REG_KEY]
             self._boosting_type = params[_BOOSTING_TYPE_KEY]
             self._model_type = ModelType(params[_MODEL_TYPE_KEY])
+            self._early_stopping_rounds = params[_EARLY_STOPPING_ROUNDS]
         catboost = self._provide_catboost()
         catboost.load_model(os.path.join(folder, _MODEL_FILENAME))
@@ -107,6 +114,7 @@ class CatboostModel(Model):
                     _L2_LEAF_REG_KEY: self._l2_leaf_reg,
                     _BOOSTING_TYPE_KEY: self._boosting_type,
                     _MODEL_TYPE_KEY: str(self._model_type),
+                    _EARLY_STOPPING_ROUNDS: self._early_stopping_rounds,
                 },
                 handle,
             )
@@ -141,7 +149,7 @@ class CatboostModel(Model):
         )
         catboost.fit(
             train_pool,
-            early_stopping_rounds=100,
+            early_stopping_rounds=self._early_stopping_rounds,
             verbose=False,
             metric_period=100,
             eval_set=eval_pool,
@@ -149,7 +157,10 @@ class CatboostModel(Model):
         return self
     def transform(self, df: pd.DataFrame) -> pd.DataFrame:
-        pred_pool = Pool(df)
+        pred_pool = Pool(
+            df,
+            cat_features=df.select_dtypes(include="category").columns.tolist(),
+        )
         catboost = self._provide_catboost()
         pred = catboost.predict(pred_pool)
         df = pd.DataFrame(
@@ -175,7 +186,7 @@ class CatboostModel(Model):
                         depth=self._depth,
                         l2_leaf_reg=self._l2_leaf_reg,
                         boosting_type=self._boosting_type,
-                        early_stopping_rounds=100,
+                        early_stopping_rounds=self._early_stopping_rounds,
                         metric_period=100,
                     )
                 case ModelType.REGRESSION:
@@ -185,7 +196,7 @@ class CatboostModel(Model):
                         depth=self._depth,
                         l2_leaf_reg=self._l2_leaf_reg,
                         boosting_type=self._boosting_type,
-                        early_stopping_rounds=100,
+                        early_stopping_rounds=self._early_stopping_rounds,
                         metric_period=100,
                     )
                 case ModelType.BINNED_BINARY:
@@ -195,7 +206,7 @@ class CatboostModel(Model):
                         depth=self._depth,
                         l2_leaf_reg=self._l2_leaf_reg,
                         boosting_type=self._boosting_type,
-                        early_stopping_rounds=100,
+                        early_stopping_rounds=self._early_stopping_rounds,
                         metric_period=100,
                     )
                 case ModelType.MULTI_CLASSIFICATION:
@@ -205,7 +216,7 @@ class CatboostModel(Model):
                         depth=self._depth,
                         l2_leaf_reg=self._l2_leaf_reg,
                         boosting_type=self._boosting_type,
-                        early_stopping_rounds=100,
+                        early_stopping_rounds=self._early_stopping_rounds,
                         metric_period=100,
                     )
             self._catboost = catboost

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/reducer/base_selector_reducer.py RENAMED Viewed

@@ -1,5 +1,6 @@
 """A reducer that uses a base selector from the feature engine."""
+import logging
 import os
 from typing import Self
@@ -26,6 +27,11 @@ class BaseSelectorReducer(Reducer):
     def name(cls) -> str:
         raise NotImplementedError("name not implemented in parent class.")
+    @classmethod
+    def should_raise(cls) -> bool:
+        """Whether the class should raise its exception if it encounters it."""
+        return True
     def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
         pass
@@ -45,11 +51,17 @@ class BaseSelectorReducer(Reducer):
         eval_x: pd.DataFrame | None = None,
         eval_y: pd.Series | pd.DataFrame | None = None,
     ) -> Self:
+        if len(df.columns) <= 1:
+            return self
         try:
             self._base_selector.fit(df)  # type: ignore
         except ValueError as exc:
-            raise WavetrainException() from exc
+            logging.warning(str(exc))
+            if self.should_raise():
+                raise WavetrainException() from exc
         return self
     def transform(self, df: pd.DataFrame) -> pd.DataFrame:
+        if len(df.columns) <= 1:
+            return df
         return self._base_selector.transform(df)

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/reducer/correlation_reducer.py RENAMED Viewed

@@ -19,3 +19,7 @@ class CorrelationReducer(BaseSelectorReducer):
     @classmethod
     def name(cls) -> str:
         return "correlation"
+    @classmethod
+    def should_raise(cls) -> bool:
+        return False

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/selector/selector.py RENAMED Viewed

@@ -53,6 +53,8 @@ class Selector(Params, Fit):
         model_kwargs = self._model.pre_fit(df, y=y, eval_x=eval_x, eval_y=eval_y)
         if not isinstance(y, pd.Series):
             raise ValueError("y is not a series.")
+        if len(df.columns) <= 1:
+            return self
         n_features_to_select = max(1, int(len(df.columns) * self._feature_ratio))
         self._selector = RFE(
             self._model.estimator,
@@ -70,6 +72,8 @@ class Selector(Params, Fit):
         return self
     def transform(self, df: pd.DataFrame) -> pd.DataFrame:
+        if len(df.columns) <= 1:
+            return df
         selector = self._selector
         if selector is None:
             raise ValueError("selector is null.")

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/trainer.py RENAMED Viewed

@@ -165,7 +165,11 @@ class Trainer(Fit):
         if y is None:
             return self
-        dt_index = df.index if self._dt_column is None else df[self._dt_column]
+        dt_index = (
+            df.index
+            if self._dt_column is None
+            else pd.DatetimeIndex(pd.to_datetime(df[self._dt_column]))
+        )
         def _fit_column(y_series: pd.Series):
             column_dir = os.path.join(self._folder, str(y_series.name))
@@ -184,10 +188,10 @@ class Trainer(Fit):
                 trial.set_user_attr(_IDX_USR_ATTR_KEY, split_idx.isoformat())
                 train_dt_index = dt_index[: len(x)]
-                x_train = x[train_dt_index < split_idx]
-                x_test = x[train_dt_index >= split_idx]
-                y_train = y_series[train_dt_index < split_idx]
-                y_test = y_series[train_dt_index >= split_idx]
+                x_train = x[train_dt_index < split_idx]  # type: ignore
+                x_test = x[train_dt_index >= split_idx]  # type: ignore
+                y_train = y_series[train_dt_index < split_idx]  # type: ignore
+                y_test = y_series[train_dt_index >= split_idx]  # type: ignore
                 try:
                     # Window the data
@@ -250,14 +254,15 @@ class Trainer(Fit):
                         return float(r2_score(y_test, y_pred[[PREDICTION_COLUMN]]))
                     return float(f1_score(y_test, y_pred[[PREDICTION_COLUMN]]))
                 except WavetrainException as exc:
+                    logging.warning("WE DID NOT END UP TRAINING ANYTHING!!!!!")
                     logging.warning(str(exc))
                     return -1.0
             start_validation_index = (
-                dt_index[-int(len(dt_index) * self._validation_size) - 1]
+                dt_index.to_list()[-int(len(dt_index) * self._validation_size) - 1]
                 if isinstance(self._validation_size, float)
                 else dt_index[
-                    dt_index >= (dt_index.to_list()[-1] - self._validation_size)
+                    dt_index >= (dt_index.to_list()[-1] - self._validation_size)  # type: ignore
                 ].to_list()[0]
             )
             test_df = df[dt_index < start_validation_index]
@@ -288,7 +293,12 @@ class Trainer(Fit):
                 )
             train_len = len(df[dt_index < start_test_index])
-            test_len = len(df.loc[start_test_index:start_validation_index])
+            test_len = len(
+                dt_index[
+                    (dt_index >= start_test_index)
+                    & (dt_index <= start_validation_index)
+                ]
+            )
             last_processed_dt = None
             for count, test_idx in tqdm.tqdm(
@@ -341,7 +351,11 @@ class Trainer(Fit):
     def transform(self, df: pd.DataFrame) -> pd.DataFrame:
         """Predict the expected values of the data."""
         feature_columns = df.columns.values
-        dt_index = df.index if self._dt_column is None else df[self._dt_column]
+        dt_index = (
+            df.index
+            if self._dt_column is None
+            else pd.DatetimeIndex(pd.to_datetime(df[self._dt_column]))
+        )
         for column in os.listdir(self._folder):
             column_path = os.path.join(self._folder, column)
@@ -353,6 +367,8 @@ class Trainer(Fit):
                 if not os.path.isdir(date_path):
                     continue
                 dates.append(datetime.datetime.fromisoformat(date_str))
+            if not dates:
+                raise ValueError(f"no dates found for {column}.")
             bins: list[datetime.datetime] = sorted(
                 [dt_index.min().to_pydatetime()]
                 + dates
@@ -371,7 +387,12 @@ class Trainer(Fit):
                 column: str,
                 dates: list[datetime.datetime],
             ) -> pd.DataFrame:
-                filtered_dates = [x for x in dates if x < group.index.max()]
+                group_dt_index = (
+                    group.index
+                    if self._dt_column is None
+                    else pd.DatetimeIndex(pd.to_datetime(group[self._dt_column]))
+                )
+                filtered_dates = [x for x in dates if x < group_dt_index.max()]
                 if not filtered_dates:
                     filtered_dates = [dates[-1]]
                 date_str = dates[-1].isoformat()

{wavetrainer-0.0.8 → wavetrainer-0.0.10/wavetrainer.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: wavetrainer
-Version: 0.0.8
+Version: 0.0.10
 Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
 Home-page: https://github.com/8W9aG/wavetrainer
 Author: Will Sackfield

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/LICENSE RENAMED Viewed

File without changes

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/MANIFEST.in RENAMED Viewed

File without changes

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/README.md RENAMED Viewed

File without changes

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/requirements.txt RENAMED Viewed

File without changes

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/setup.cfg RENAMED Viewed

File without changes

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/tests/__init__.py RENAMED Viewed

File without changes

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/tests/model/__init__.py RENAMED Viewed

File without changes

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/tests/model/catboost_kwargs_test.py RENAMED Viewed

File without changes

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/calibrator/__init__.py RENAMED Viewed

File without changes

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/calibrator/calibrator.py RENAMED Viewed

File without changes

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/calibrator/calibrator_router.py RENAMED Viewed

File without changes

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/calibrator/mapie_calibrator.py RENAMED Viewed

File without changes

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/calibrator/vennabers_calibrator.py RENAMED Viewed

File without changes

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/create.py RENAMED Viewed

File without changes

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/exceptions.py RENAMED Viewed

File without changes

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/fit.py RENAMED Viewed

File without changes

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/model/__init__.py RENAMED Viewed

File without changes

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/model/catboost_classifier_wrap.py RENAMED Viewed

File without changes

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/model/catboost_kwargs.py RENAMED Viewed

File without changes

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/model/catboost_regressor_wrap.py RENAMED Viewed

File without changes

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/model/model.py RENAMED Viewed

File without changes

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/model/model_router.py RENAMED Viewed

File without changes

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/model_type.py RENAMED Viewed

File without changes

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/params.py RENAMED Viewed

File without changes

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/reducer/__init__.py RENAMED Viewed

File without changes

{wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/reducer/combined_reducer.py RENAMED Viewed

@@ -27,8 +27,8 @@ class CombinedReducer(Reducer):
         self._reducers = [
             ConstantReducer(),
             DuplicateReducer(),
-            CorrelationReducer(),
             NonNumericReducer(),
+            CorrelationReducer(),
         ]
     @classmethod