wavetrainer 0.0.36__tar.gz → 0.0.38__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {wavetrainer-0.0.36/wavetrainer.egg-info → wavetrainer-0.0.38}/PKG-INFO +1 -1
  2. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/setup.py +1 -1
  3. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/__init__.py +1 -1
  4. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/model/catboost_model.py +3 -1
  5. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/model/tabpfn_model.py +7 -1
  6. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/reducer/base_selector_reducer.py +9 -3
  7. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/reducer/combined_reducer.py +6 -1
  8. wavetrainer-0.0.38/wavetrainer/reducer/select_by_single_feature_performance_reducer.py +57 -0
  9. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/reducer/smart_correlation_reducer.py +4 -0
  10. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/trainer.py +1 -1
  11. {wavetrainer-0.0.36 → wavetrainer-0.0.38/wavetrainer.egg-info}/PKG-INFO +1 -1
  12. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer.egg-info/SOURCES.txt +1 -0
  13. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/LICENSE +0 -0
  14. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/MANIFEST.in +0 -0
  15. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/README.md +0 -0
  16. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/requirements.txt +0 -0
  17. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/setup.cfg +0 -0
  18. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/tests/__init__.py +0 -0
  19. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/tests/model/__init__.py +0 -0
  20. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/tests/model/catboost_kwargs_test.py +0 -0
  21. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/tests/trainer_test.py +0 -0
  22. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/calibrator/__init__.py +0 -0
  23. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/calibrator/calibrator.py +0 -0
  24. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/calibrator/calibrator_router.py +0 -0
  25. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/calibrator/mapie_calibrator.py +0 -0
  26. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/calibrator/vennabers_calibrator.py +0 -0
  27. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/create.py +0 -0
  28. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/exceptions.py +0 -0
  29. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/fit.py +0 -0
  30. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/model/__init__.py +0 -0
  31. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/model/catboost_classifier_wrap.py +0 -0
  32. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/model/catboost_kwargs.py +0 -0
  33. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/model/catboost_regressor_wrap.py +0 -0
  34. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/model/model.py +0 -0
  35. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/model/model_router.py +0 -0
  36. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/model_type.py +0 -0
  37. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/params.py +0 -0
  38. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/reducer/__init__.py +0 -0
  39. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/reducer/constant_reducer.py +0 -0
  40. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/reducer/correlation_reducer.py +0 -0
  41. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/reducer/duplicate_reducer.py +0 -0
  42. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/reducer/non_categorical_numeric_columns.py +0 -0
  43. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/reducer/nonnumeric_reducer.py +0 -0
  44. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/reducer/reducer.py +0 -0
  45. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/reducer/unseen_reducer.py +0 -0
  46. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/selector/__init__.py +0 -0
  47. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/selector/selector.py +0 -0
  48. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/weights/__init__.py +0 -0
  49. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/weights/class_weights.py +0 -0
  50. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/weights/combined_weights.py +0 -0
  51. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/weights/exponential_weights.py +0 -0
  52. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/weights/linear_weights.py +0 -0
  53. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/weights/noop_weights.py +0 -0
  54. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/weights/sigmoid_weights.py +0 -0
  55. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/weights/weights.py +0 -0
  56. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/weights/weights_router.py +0 -0
  57. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/windower/__init__.py +0 -0
  58. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/windower/windower.py +0 -0
  59. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer.egg-info/dependency_links.txt +0 -0
  60. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer.egg-info/not-zip-safe +0 -0
  61. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer.egg-info/requires.txt +0 -0
  62. {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.0.36
3
+ Version: 0.0.38
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
23
23
 
24
24
  setup(
25
25
  name='wavetrainer',
26
- version='0.0.36',
26
+ version='0.0.38',
27
27
  description='A library for automatically finding the optimal model within feature and hyperparameter space.',
28
28
  long_description=long_description,
29
29
  long_description_content_type='text/markdown',
@@ -2,5 +2,5 @@
2
2
 
3
3
  from .create import create
4
4
 
5
- __VERSION__ = "0.0.36"
5
+ __VERSION__ = "0.0.38"
6
6
  __all__ = ("create",)
@@ -175,9 +175,11 @@ class CatboostModel(Model):
175
175
  label=eval_y,
176
176
  cat_features=eval_x.select_dtypes(include="category").columns.tolist(),
177
177
  )
178
- if eval_x is not None and self._best_iteration is not None
178
+ if eval_x is not None
179
179
  else None
180
180
  )
181
+ if self._best_iteration is not None:
182
+ eval_pool = None
181
183
  catboost.fit(
182
184
  train_pool,
183
185
  early_stopping_rounds=self._early_stopping_rounds,
@@ -2,6 +2,7 @@
2
2
  # pylint: disable=duplicate-code,too-many-arguments,too-many-positional-arguments
3
3
 
4
4
  import json
5
+ import logging
5
6
  import os
6
7
  import pickle
7
8
  from typing import Any, Self
@@ -13,6 +14,7 @@ import torch
13
14
  from tabpfn_extensions.post_hoc_ensembles.sklearn_interface import ( # type: ignore
14
15
  AutoTabPFNClassifier, AutoTabPFNRegressor)
15
16
 
17
+ from ..exceptions import WavetrainException
16
18
  from ..model_type import ModelType, determine_model_type
17
19
  from .model import PREDICTION_COLUMN, PROBABILITY_COLUMN_PREFIX, Model
18
20
 
@@ -104,7 +106,11 @@ class TabPFNModel(Model):
104
106
  raise ValueError("y is null.")
105
107
  self._model_type = determine_model_type(y)
106
108
  tabpfn = self._provide_tabpfn()
107
- tabpfn.fit(df, y)
109
+ try:
110
+ tabpfn.fit(df, y)
111
+ except ValueError as exc:
112
+ logging.warning(str(exc))
113
+ raise WavetrainException() from exc
108
114
  return self
109
115
 
110
116
  def transform(self, df: pd.DataFrame) -> pd.DataFrame:
@@ -56,8 +56,8 @@ class BaseSelectorReducer(Reducer):
56
56
  if len(df.columns) <= 1:
57
57
  return self
58
58
  try:
59
- self._base_selector.fit(df) # type: ignore
60
- except ValueError as exc:
59
+ self._base_selector.fit(df, y=y) # type: ignore
60
+ except (ValueError, AttributeError) as exc:
61
61
  logging.warning(str(exc))
62
62
  if self.should_raise():
63
63
  raise WavetrainException() from exc
@@ -66,4 +66,10 @@ class BaseSelectorReducer(Reducer):
66
66
  def transform(self, df: pd.DataFrame) -> pd.DataFrame:
67
67
  if len(df.columns) <= 1:
68
68
  return df
69
- return self._base_selector.transform(df)
69
+ try:
70
+ return self._base_selector.transform(df)
71
+ except (ValueError, AttributeError) as exc:
72
+ logging.warning(str(exc))
73
+ if self.should_raise():
74
+ raise WavetrainException() from exc
75
+ return df
@@ -13,6 +13,8 @@ from .correlation_reducer import CorrelationReducer
13
13
  from .duplicate_reducer import DuplicateReducer
14
14
  from .nonnumeric_reducer import NonNumericReducer
15
15
  from .reducer import Reducer
16
+ from .select_by_single_feature_performance_reducer import \
17
+ SelectBySingleFeaturePerformanceReducer
16
18
  from .smart_correlation_reducer import SmartCorrelationReducer
17
19
  from .unseen_reducer import UnseenReducer
18
20
 
@@ -35,6 +37,7 @@ class CombinedReducer(Reducer):
35
37
  DuplicateReducer(),
36
38
  CorrelationReducer(),
37
39
  SmartCorrelationReducer(),
40
+ SelectBySingleFeaturePerformanceReducer(),
38
41
  ]
39
42
  self._folder = None
40
43
 
@@ -67,6 +70,8 @@ class CombinedReducer(Reducer):
67
70
  self._reducers.append(UnseenReducer())
68
71
  elif reducer_name == SmartCorrelationReducer.name():
69
72
  self._reducers.append(SmartCorrelationReducer())
73
+ elif reducer_name == SelectBySingleFeaturePerformanceReducer.name():
74
+ self._reducers.append(SelectBySingleFeaturePerformanceReducer())
70
75
  for reducer in self._reducers:
71
76
  reducer.load(folder)
72
77
  self._folder = folder
@@ -95,7 +100,7 @@ class CombinedReducer(Reducer):
95
100
  removed_columns_dict = {}
96
101
  for reducer in self._reducers:
97
102
  before_columns = set(df.columns.values)
98
- df = reducer.fit_transform(df)
103
+ df = reducer.fit_transform(df, y=y)
99
104
  after_columns = set(df.columns.values)
100
105
  removed_columns = before_columns.difference(after_columns)
101
106
  if removed_columns:
@@ -0,0 +1,57 @@
1
+ """A reducer that removes features by their single performance via further heuristics."""
2
+
3
+ from typing import Self
4
+
5
+ import optuna
6
+ import pandas as pd
7
+ from feature_engine.selection import SelectBySingleFeaturePerformance
8
+ from sklearn.ensemble import RandomForestClassifier # type: ignore
9
+
10
+ from .base_selector_reducer import BaseSelectorReducer
11
+ from ..model_type import ModelType, determine_model_type
12
+
13
+ _SINGLE_FEATURE_PERFORMANCE_REDUCER_FILENAME = (
14
+ "single_feature_performance_reducer.joblib"
15
+ )
16
+ _SINGLE_FEATURE_PERFORMANCE_REDUCER_THRESHOLD = (
17
+ "single_feature_performance_reducer_threshold"
18
+ )
19
+
20
+
21
+ class SelectBySingleFeaturePerformanceReducer(BaseSelectorReducer):
22
+ """A class that removes smart correlated values from a dataset."""
23
+
24
+ def __init__(self) -> None:
25
+ self._singlefeatureperformance_selector = SelectBySingleFeaturePerformance(
26
+ RandomForestClassifier(random_state=42), scoring="accuracy"
27
+ )
28
+ super().__init__(
29
+ self._singlefeatureperformance_selector,
30
+ _SINGLE_FEATURE_PERFORMANCE_REDUCER_FILENAME,
31
+ )
32
+
33
+ @classmethod
34
+ def name(cls) -> str:
35
+ return "single_feature_performance"
36
+
37
+ @classmethod
38
+ def should_raise(cls) -> bool:
39
+ return False
40
+
41
+ def set_options(
42
+ self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
43
+ ) -> None:
44
+ self._singlefeatureperformance_selector.threshold = trial.suggest_float(
45
+ _SINGLE_FEATURE_PERFORMANCE_REDUCER_THRESHOLD, 0.1, 0.9
46
+ )
47
+
48
+ def fit(
49
+ self,
50
+ df: pd.DataFrame,
51
+ y: pd.Series | pd.DataFrame | None = None,
52
+ w: pd.Series | None = None,
53
+ eval_x: pd.DataFrame | None = None,
54
+ eval_y: pd.Series | pd.DataFrame | None = None,
55
+ ) -> Self:
56
+ self._singlefeatureperformance_selector.scoring = "r2" if determine_model_type(y) == ModelType.REGRESSION else "accuracy"
57
+ return super().fit(df, y=y, w=w, eval_x=eval_x, eval_y=eval_y)
@@ -29,6 +29,10 @@ class SmartCorrelationReducer(BaseSelectorReducer):
29
29
  def name(cls) -> str:
30
30
  return "smart_correlation"
31
31
 
32
+ @classmethod
33
+ def should_raise(cls) -> bool:
34
+ return False
35
+
32
36
  def set_options(
33
37
  self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
34
38
  ) -> None:
@@ -244,7 +244,7 @@ class Trainer(Fit):
244
244
  # Perform common reductions
245
245
  reducer = CombinedReducer()
246
246
  reducer.set_options(trial, x)
247
- x_train = reducer.fit_transform(x_train)
247
+ x_train = reducer.fit_transform(x_train, y=y_train)
248
248
  x_test = reducer.transform(x_test)
249
249
 
250
250
  # Calculate the row weights
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.0.36
3
+ Version: 0.0.38
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
@@ -42,6 +42,7 @@ wavetrainer/reducer/duplicate_reducer.py
42
42
  wavetrainer/reducer/non_categorical_numeric_columns.py
43
43
  wavetrainer/reducer/nonnumeric_reducer.py
44
44
  wavetrainer/reducer/reducer.py
45
+ wavetrainer/reducer/select_by_single_feature_performance_reducer.py
45
46
  wavetrainer/reducer/smart_correlation_reducer.py
46
47
  wavetrainer/reducer/unseen_reducer.py
47
48
  wavetrainer/selector/__init__.py
File without changes
File without changes
File without changes
File without changes