PyPI - wavetrainer - Versions diffs - 0.1.17__tar.gz → 0.1.19__tar.gz - Mend

wavetrainer 0.1.17tar.gz → 0.1.19tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

{wavetrainer-0.1.17/wavetrainer.egg-info → wavetrainer-0.1.19}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: wavetrainer
-Version: 0.1.17
+Version: 0.1.19
 Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
 Home-page: https://github.com/8W9aG/wavetrainer
 Author: Will Sackfield

{wavetrainer-0.1.17 → wavetrainer-0.1.19}/setup.py RENAMED Viewed

@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
 setup(
     name='wavetrainer',
-    version='0.1.17',
+    version='0.1.19',
     description='A library for automatically finding the optimal model within feature and hyperparameter space.',
     long_description=long_description,
     long_description_content_type='text/markdown',

{wavetrainer-0.1.17 → wavetrainer-0.1.19}/tests/trainer_test.py RENAMED Viewed

@@ -40,7 +40,7 @@ class TestTrainer(unittest.TestCase):
     def test_trainer_dt_column(self):
         with tempfile.TemporaryDirectory() as tmpdir:
-            trainer = Trainer(tmpdir, walkforward_timedelta=datetime.timedelta(days=7), trials=5, dt_column="dt_column")
+            trainer = Trainer(tmpdir, walkforward_timedelta=datetime.timedelta(days=7), trials=5, dt_column="dt_column", allowed_models={"catboost"})
             x_data = [i for i in range(100)]
             x_index = [datetime.datetime(2022, 1, 1) + datetime.timedelta(days=i) for i in range(len(x_data))]
             df = pd.DataFrame(
@@ -55,6 +55,7 @@ class TestTrainer(unittest.TestCase):
                 },
                 index=df.index,
             )
+            y["y"] = y["y"].astype(bool)
             trainer.fit(df, y=y)
             df = trainer.transform(df)
             print("df:")

{wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/__init__.py RENAMED Viewed

@@ -2,5 +2,5 @@
 from .create import create
-__VERSION__ = "0.1.17"
+__VERSION__ = "0.1.19"
 __all__ = ("create",)

{wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/calibrator/vennabers_calibrator.py RENAMED Viewed

@@ -5,6 +5,7 @@ import os
 from typing import Self
 import joblib  # type: ignore
+import numpy as np
 import optuna
 import pandas as pd
 from venn_abers import VennAbers  # type: ignore
@@ -55,18 +56,26 @@ class VennabersCalibrator(Calibrator):
             raise ValueError("vennabers is null")
         if y is None:
             raise ValueError("y is null")
-        prob_columns = [
-            x for x in df.columns.values if x.startswith(PROBABILITY_COLUMN_PREFIX)
-        ]
+        prob_columns = sorted(
+            [x for x in df.columns.values if x.startswith(PROBABILITY_COLUMN_PREFIX)]
+        )
+        probs = df[prob_columns].to_numpy()
         try:
-            vennabers.fit(df[prob_columns].to_numpy(), y.to_numpy())
+            vennabers.fit(probs, y.to_numpy())
         except IndexError:
             logging.error(df)
             raise
         return self
     def transform(self, df: pd.DataFrame) -> pd.DataFrame:
-        p_prime, _ = self._vennabers.predict_proba(df.to_numpy())
+        prob_columns = sorted(
+            [x for x in df.columns.values if x.startswith(PROBABILITY_COLUMN_PREFIX)]
+        )
+        probs = df[prob_columns].to_numpy()
+        p_prime, _ = self._vennabers.predict_proba(probs)
+        if np.mean(p_prime[:, 1] > 0.5) > 0.5 and np.mean(probs[:, 0] > 0.5) > 0.5:
+            print("⚠️ Warning: calibration seems inverted — flipping p_prime")
+            p_prime = p_prime[:, ::-1]
         for i in range(p_prime.shape[1]):
             prob = p_prime[:, i]
             df[f"{PROBABILITY_COLUMN_PREFIX}{i}"] = prob

{wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/reducer/combined_reducer.py RENAMED Viewed

@@ -41,8 +41,8 @@ class CombinedReducer(Reducer):
             UnseenReducer(),
             NonNumericReducer(),
             PCAReducer(embedding_cols),
-            ConstantReducer(),
-            DuplicateReducer(),
+            # ConstantReducer(),
+            # DuplicateReducer(),
             CorrelationReducer(correlation_chunk_size=correlation_chunk_size),
             SmartCorrelationReducer(),
             # SelectBySingleFeaturePerformanceReducer(),

{wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/reducer/correlation_reducer.py RENAMED Viewed

@@ -8,7 +8,6 @@ from typing import Self
 import numpy as np
 import optuna
 import pandas as pd
-from memory_profiler import profile  # type: ignore
 from .non_categorical_numeric_columns import \
     find_non_categorical_numeric_columns
@@ -18,7 +17,6 @@ _CORRELATION_REDUCER_FILENAME = "correlation_reducer.json"
 _CORRELATION_REDUCER_THRESHOLD = "correlation_reducer_threshold"
-@profile
 def _get_correlated_features_to_drop_chunked(
     df: pd.DataFrame,
     threshold: float = 0.85,
@@ -119,7 +117,7 @@ class CorrelationReducer(Reducer):
         eval_y: pd.Series | pd.DataFrame | None = None,
     ) -> Self:
         drop_features = _get_correlated_features_to_drop_chunked(
-            df.copy(),
+            df,
             threshold=self._threshold,
             chunk_size=self._correlation_chunk_size,
         )

{wavetrainer-0.1.17 → wavetrainer-0.1.19/wavetrainer.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: wavetrainer
-Version: 0.1.17
+Version: 0.1.19
 Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
 Home-page: https://github.com/8W9aG/wavetrainer
 Author: Will Sackfield