PyPI - wavetrainer - Versions diffs - 0.1.15__tar.gz → 0.1.16__tar.gz - Mend

wavetrainer 0.1.15tar.gz → 0.1.16tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

{wavetrainer-0.1.15/wavetrainer.egg-info → wavetrainer-0.1.16}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: wavetrainer
-Version: 0.1.15
+Version: 0.1.16
 Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
 Home-page: https://github.com/8W9aG/wavetrainer
 Author: Will Sackfield

{wavetrainer-0.1.15 → wavetrainer-0.1.16}/setup.py RENAMED Viewed

@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
 setup(
     name='wavetrainer',
-    version='0.1.15',
+    version='0.1.16',
     description='A library for automatically finding the optimal model within feature and hyperparameter space.',
     long_description=long_description,
     long_description_content_type='text/markdown',

{wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/create.py RENAMED Viewed

@@ -18,6 +18,7 @@ def create(
     embedding_cols: list[list[str]] | None = None,
     allowed_models: set[str] | None = None,
     max_false_positive_reduction_steps: int | None = None,
+    correlation_chunk_size: int | None = None,
 ) -> Trainer:
     """Create a trainer."""
     return Trainer(
@@ -31,4 +32,5 @@ def create(
         embedding_cols=embedding_cols,
         allowed_models=allowed_models,
         max_false_positive_reduction_steps=max_false_positive_reduction_steps,
+        correlation_chunk_size=correlation_chunk_size,
     )

{wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/reducer/combined_reducer.py RENAMED Viewed

@@ -31,20 +31,25 @@ class CombinedReducer(Reducer):
     # pylint: disable=too-many-positional-arguments,too-many-arguments
     _folder: str | None
-    def __init__(self, embedding_cols: list[list[str]] | None):
+    def __init__(
+        self, embedding_cols: list[list[str]] | None, correlation_chunk_size: int | None
+    ):
         super().__init__()
+        if correlation_chunk_size is None:
+            correlation_chunk_size = 500
         self._reducers = [
             UnseenReducer(),
             NonNumericReducer(),
             PCAReducer(embedding_cols),
             ConstantReducer(),
             DuplicateReducer(),
-            CorrelationReducer(),
+            CorrelationReducer(correlation_chunk_size=correlation_chunk_size),
             SmartCorrelationReducer(),
             # SelectBySingleFeaturePerformanceReducer(),
         ]
         self._folder = None
         self._embedding_cols = embedding_cols
+        self._correlation_chunk_size = correlation_chunk_size
     @classmethod
     def name(cls) -> str:
@@ -68,7 +73,9 @@ class CombinedReducer(Reducer):
                 elif reducer_name == DuplicateReducer.name():
                     self._reducers.append(DuplicateReducer())
                 elif reducer_name == CorrelationReducer.name():
-                    self._reducers.append(CorrelationReducer())
+                    self._reducers.append(
+                        CorrelationReducer(self._correlation_chunk_size)
+                    )
                 elif reducer_name == NonNumericReducer.name():
                     self._reducers.append(NonNumericReducer())
                 elif reducer_name == UnseenReducer.name():

{wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/reducer/correlation_reducer.py RENAMED Viewed

@@ -82,9 +82,10 @@ class CorrelationReducer(Reducer):
     _correlation_drop_features: dict[str, bool]
-    def __init__(self) -> None:
+    def __init__(self, correlation_chunk_size: int) -> None:
         self._threshold = 0.0
         self._correlation_drop_features = {}
+        self._correlation_chunk_size = correlation_chunk_size
     @classmethod
     def name(cls) -> str:
@@ -116,7 +117,9 @@ class CorrelationReducer(Reducer):
         eval_y: pd.Series | pd.DataFrame | None = None,
     ) -> Self:
         drop_features = _get_correlated_features_to_drop_chunked(
-            df, threshold=self._threshold
+            df.copy(),
+            threshold=self._threshold,
+            chunk_size=self._correlation_chunk_size,
         )
         self._correlation_drop_features = {x: True for x in drop_features}
         return self

{wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/trainer.py RENAMED Viewed

@@ -42,6 +42,7 @@ _VALIDATION_SIZE_KEY = "validation_size"
 _IDX_USR_ATTR_KEY = "idx"
 _DT_COLUMN_KEY = "dt_column"
 _MAX_FALSE_POSITIVE_REDUCTION_STEPS_KEY = "max_false_positive_reduction_steps"
+_CORRELATION_CHUNK_SIZE_KEY = "correlation_chunk_size"
 _BAD_OUTPUT = -1000.0
@@ -75,6 +76,7 @@ class Trainer(Fit):
         embedding_cols: list[list[str]] | None = None,
         allowed_models: set[str] | None = None,
         max_false_positive_reduction_steps: int | None = None,
+        correlation_chunk_size: int | None = None,
     ):
         tqdm.tqdm.pandas()
@@ -129,6 +131,8 @@ class Trainer(Fit):
                     max_false_positive_reduction_steps = params.get(
                         _MAX_FALSE_POSITIVE_REDUCTION_STEPS_KEY
                     )
+                if correlation_chunk_size is None:
+                    correlation_chunk_size = params.get(_CORRELATION_CHUNK_SIZE_KEY)
         else:
             with open(params_file, "w", encoding="utf8") as handle:
                 validation_size_value = None
@@ -160,6 +164,7 @@ class Trainer(Fit):
                         _VALIDATION_SIZE_KEY: validation_size_value,
                         _DT_COLUMN_KEY: dt_column,
                         _MAX_FALSE_POSITIVE_REDUCTION_STEPS_KEY: max_false_positive_reduction_steps,
+                        _CORRELATION_CHUNK_SIZE_KEY: correlation_chunk_size,
                     },
                     handle,
                 )
@@ -173,6 +178,7 @@ class Trainer(Fit):
         self.embedding_cols = embedding_cols
         self._allowed_models = allowed_models
         self._max_false_positive_reduction_steps = max_false_positive_reduction_steps
+        self._correlation_chunk_size = correlation_chunk_size
     def _provide_study(self, column: str) -> optuna.Study:
         storage_name = f"sqlite:///{self._folder}/{column}/{_STUDYDB_FILENAME}"
@@ -246,7 +252,8 @@ class Trainer(Fit):
                                 "Found trial %d previously executed, skipping...",
                                 trial.number,
                             )
-                            return trial_info["output"]
+                            return tuple(trial_info["output"])
+                        print("Retraining for different trial number.")
                 train_dt_index = dt_index[: len(x)]
                 x_train = x[train_dt_index < split_idx]  # type: ignore
@@ -270,7 +277,9 @@ class Trainer(Fit):
                     # Perform common reductions
                     start_reducer = time.time()
-                    reducer = CombinedReducer(self.embedding_cols)
+                    reducer = CombinedReducer(
+                        self.embedding_cols, self._correlation_chunk_size
+                    )
                     reducer.set_options(trial, x)
                     x_train = reducer.fit_transform(x_train, y=y_train)
                     x_test = reducer.transform(x_test)
@@ -367,7 +376,7 @@ class Trainer(Fit):
                             json.dump(
                                 {
                                     "number": trial.number,
-                                    "output": output,
+                                    "output": [output, loss],
                                 },
                                 handle,
                             )
@@ -583,7 +592,9 @@ class Trainer(Fit):
                 date_str = dates[-1].isoformat()
                 folder = os.path.join(column_path, date_str)
-                reducer = CombinedReducer(self.embedding_cols)
+                reducer = CombinedReducer(
+                    self.embedding_cols, self._correlation_chunk_size
+                )
                 reducer.load(folder)
                 model = ModelRouter(None, None)

{wavetrainer-0.1.15 → wavetrainer-0.1.16/wavetrainer.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: wavetrainer
-Version: 0.1.15
+Version: 0.1.16
 Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
 Home-page: https://github.com/8W9aG/wavetrainer
 Author: Will Sackfield