PyPI - wavetrainer - Versions diffs - 0.0.28__tar.gz → 0.0.30__tar.gz - Mend

wavetrainer 0.0.28tar.gz → 0.0.30tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

{wavetrainer-0.0.28/wavetrainer.egg-info → wavetrainer-0.0.30}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: wavetrainer
-Version: 0.0.28
+Version: 0.0.30
 Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
 Home-page: https://github.com/8W9aG/wavetrainer
 Author: Will Sackfield

{wavetrainer-0.0.28 → wavetrainer-0.0.30}/setup.py RENAMED Viewed

@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
 setup(
     name='wavetrainer',
-    version='0.0.28',
+    version='0.0.30',
     description='A library for automatically finding the optimal model within feature and hyperparameter space.',
     long_description=long_description,
     long_description_content_type='text/markdown',

{wavetrainer-0.0.28 → wavetrainer-0.0.30}/tests/trainer_test.py RENAMED Viewed

@@ -58,4 +58,4 @@ class TestTrainer(unittest.TestCase):
             trainer.fit(df, y=y)
             df = trainer.transform(df)
             print("df:")
-            print(df)
+            print(df)

{wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/__init__.py RENAMED Viewed

@@ -2,5 +2,5 @@
 from .create import create
-__VERSION__ = "0.0.28"
+__VERSION__ = "0.0.30"
 __all__ = ("create",)

{wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/model/catboost_model.py RENAMED Viewed

@@ -72,6 +72,16 @@ class CatboostModel(Model):
     def supports_importances(self) -> bool:
         return True
+    @property
+    def feature_importances(self) -> dict[str, float]:
+        catboost = self._provide_catboost()
+        importances = catboost.get_feature_importance(prettified=True)
+        if importances is None:
+            raise ValueError("importances is null")
+        feature_ids = importances["Feature Id"].to_list()  # type: ignore
+        importances = importances["Importances"].to_list()  # type: ignore
+        return {feature_ids[x]: importances[x] for x in range(len(feature_ids))}
     def pre_fit(
         self,
         df: pd.DataFrame,
@@ -165,7 +175,7 @@ class CatboostModel(Model):
                 label=eval_y,
                 cat_features=eval_x.select_dtypes(include="category").columns.tolist(),
             )
-            if eval_x is not None
+            if eval_x is not None and self._best_iteration is not None
             else None
         )
         catboost.fit(

{wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/model/model.py RENAMED Viewed

@@ -37,6 +37,13 @@ class Model(Params, Fit):
             "supports_importances not implemented in parent class."
         )
+    @property
+    def feature_importances(self) -> dict[str, float]:
+        """The feature importances of this model."""
+        raise NotImplementedError(
+            "feature_importances not implemented in parent class."
+        )
     def pre_fit(
         self,
         df: pd.DataFrame,

{wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/model/model_router.py RENAMED Viewed

@@ -52,6 +52,13 @@ class ModelRouter(Model):
             raise ValueError("model is null")
         return model.supports_importances
+    @property
+    def feature_importances(self) -> dict[str, float]:
+        model = self._model
+        if model is None:
+            raise ValueError("model is null")
+        return model.feature_importances
     def pre_fit(
         self,
         df: pd.DataFrame,

{wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/model/tabpfn_model.py RENAMED Viewed

@@ -48,6 +48,10 @@ class TabPFNModel(Model):
     def supports_importances(self) -> bool:
         return False
+    @property
+    def feature_importances(self) -> dict[str, float]:
+        return {}
     def pre_fit(
         self,
         df: pd.DataFrame,

{wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/trainer.py RENAMED Viewed

@@ -28,6 +28,7 @@ from .windower.windower import Windower
 _SAMPLER_FILENAME = "sampler.pkl"
 _STUDYDB_FILENAME = "study.db"
 _PARAMS_FILENAME = "params.json"
+_TRIAL_FILENAME = "trial.json"
 _TRIALS_KEY = "trials"
 _WALKFORWARD_TIMEDELTA_KEY = "walkforward_timedelta"
 _DAYS_KEY = "days"
@@ -38,6 +39,13 @@ _IDX_USR_ATTR_KEY = "idx"
 _DT_COLUMN_KEY = "dt_column"
+def _assign_bin(timestamp, bins: list[datetime.datetime]) -> int:
+    for i in range(len(bins) - 1):
+        if bins[i] <= timestamp < bins[i + 1]:
+            return i
+    return len(bins) - 2  # Assign to last bin if at the end
 class Trainer(Fit):
     """A class for training and predicting from an array of data."""
@@ -198,6 +206,20 @@ class Trainer(Fit):
             ) -> float:
                 print(f"Beginning trial for: {split_idx.isoformat()}")
                 trial.set_user_attr(_IDX_USR_ATTR_KEY, split_idx.isoformat())
+                folder = os.path.join(
+                    self._folder, str(y_series.name), split_idx.isoformat()
+                )
+                os.makedirs(folder, exist_ok=True)
+                trial_file = os.path.join(folder, _TRIAL_FILENAME)
+                if os.path.exists(trial_file):
+                    with open(trial_file, encoding="utf8") as handle:
+                        trial_info = json.load(handle)
+                        if trial_info["number"] == trial.number:
+                            logging.info(
+                                "Found trial %d previously executed, skipping...",
+                                trial.number,
+                            )
+                            return trial_info["output"]
                 train_dt_index = dt_index[: len(x)]
                 x_train = x[train_dt_index < split_idx]  # type: ignore
@@ -247,24 +269,32 @@ class Trainer(Fit):
                     calibrator.set_options(trial, x)
                     calibrator.fit(x_pred, y=y_train)
+                    # Output
+                    y_pred = model.transform(x_test)
+                    y_pred = calibrator.transform(y_pred)
+                    output = 0.0
+                    if determine_model_type(y_series) == ModelType.REGRESSION:
+                        output = float(r2_score(y_test, y_pred[[PREDICTION_COLUMN]]))
+                    else:
+                        output = float(f1_score(y_test, y_pred[[PREDICTION_COLUMN]]))
                     if save:
-                        folder = os.path.join(
-                            self._folder, str(y_series.name), split_idx.isoformat()
-                        )
-                        if not os.path.exists(folder):
-                            os.mkdir(folder)
                         windower.save(folder, trial)
                         reducer.save(folder, trial)
                         weights.save(folder, trial)
                         model.save(folder, trial)
                         selector.save(folder, trial)
                         calibrator.save(folder, trial)
-                    y_pred = model.transform(x_test)
-                    y_pred = calibrator.transform(y_pred)
-                    if determine_model_type(y_series) == ModelType.REGRESSION:
-                        return float(r2_score(y_test, y_pred[[PREDICTION_COLUMN]]))
-                    return float(f1_score(y_test, y_pred[[PREDICTION_COLUMN]]))
+                        with open(trial_file, "w", encoding="utf8") as handle:
+                            json.dump(
+                                {
+                                    "number": trial.number,
+                                    "output": output,
+                                },
+                                handle,
+                            )
+                    return output
                 except WavetrainException as exc:
                     logging.warning(str(exc))
                     return -1.0
@@ -403,12 +433,6 @@ class Trainer(Fit):
                 + [(dt_index.max() + pd.Timedelta(days=1)).to_pydatetime()]
             )
-            def assign_bin(timestamp, bins: list[datetime.datetime]) -> int:
-                for i in range(len(bins) - 1):
-                    if bins[i] <= timestamp < bins[i + 1]:
-                        return i
-                return len(bins) - 2  # Assign to last bin if at the end
             def perform_predictions(
                 group: pd.DataFrame,
                 column_path: str,
@@ -448,7 +472,7 @@ class Trainer(Fit):
             old_index = dt_index.copy()
             df = df.groupby(
-                dt_index.map(functools.partial(assign_bin, bins=bins))
+                dt_index.map(functools.partial(_assign_bin, bins=bins))
             ).progress_apply(  # type: ignore
                 functools.partial(
                     perform_predictions,
@@ -466,3 +490,21 @@ class Trainer(Fit):
             df[col] = input_df[col]
         return df
+    def feature_importances(self) -> dict[str, dict[str, float]]:
+        """Find the feature importances for the rolling models."""
+        feature_importances = {}
+        for column in os.listdir(self._folder):
+            column_path = os.path.join(self._folder, column)
+            if not os.path.isdir(column_path):
+                continue
+            for date_str in os.listdir(column_path):
+                date_path = os.path.join(column_path, date_str)
+                if not os.path.isdir(date_path):
+                    continue
+                model = ModelRouter()
+                model.load(date_path)
+                feature_importances[date_str] = model.feature_importances
+        return feature_importances

{wavetrainer-0.0.28 → wavetrainer-0.0.30/wavetrainer.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: wavetrainer
-Version: 0.0.28
+Version: 0.0.30
 Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
 Home-page: https://github.com/8W9aG/wavetrainer
 Author: Will Sackfield