wavetrainer 0.0.36__tar.gz → 0.0.38__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {wavetrainer-0.0.36/wavetrainer.egg-info → wavetrainer-0.0.38}/PKG-INFO +1 -1
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/setup.py +1 -1
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/__init__.py +1 -1
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/model/catboost_model.py +3 -1
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/model/tabpfn_model.py +7 -1
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/reducer/base_selector_reducer.py +9 -3
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/reducer/combined_reducer.py +6 -1
- wavetrainer-0.0.38/wavetrainer/reducer/select_by_single_feature_performance_reducer.py +57 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/reducer/smart_correlation_reducer.py +4 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/trainer.py +1 -1
- {wavetrainer-0.0.36 → wavetrainer-0.0.38/wavetrainer.egg-info}/PKG-INFO +1 -1
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer.egg-info/SOURCES.txt +1 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/LICENSE +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/MANIFEST.in +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/README.md +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/requirements.txt +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/setup.cfg +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/tests/__init__.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/tests/model/__init__.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/tests/model/catboost_kwargs_test.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/tests/trainer_test.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/calibrator/__init__.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/calibrator/calibrator.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/calibrator/calibrator_router.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/calibrator/mapie_calibrator.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/calibrator/vennabers_calibrator.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/create.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/exceptions.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/fit.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/model/__init__.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/model/catboost_classifier_wrap.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/model/catboost_kwargs.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/model/catboost_regressor_wrap.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/model/model.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/model/model_router.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/model_type.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/params.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/reducer/__init__.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/reducer/constant_reducer.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/reducer/correlation_reducer.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/reducer/duplicate_reducer.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/reducer/non_categorical_numeric_columns.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/reducer/nonnumeric_reducer.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/reducer/reducer.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/reducer/unseen_reducer.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/selector/__init__.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/selector/selector.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/weights/__init__.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/weights/class_weights.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/weights/combined_weights.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/weights/exponential_weights.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/weights/linear_weights.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/weights/noop_weights.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/weights/sigmoid_weights.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/weights/weights.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/weights/weights_router.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/windower/__init__.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/windower/windower.py +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer.egg-info/dependency_links.txt +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer.egg-info/not-zip-safe +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer.egg-info/requires.txt +0 -0
- {wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
|
|
23
23
|
|
24
24
|
setup(
|
25
25
|
name='wavetrainer',
|
26
|
-
version='0.0.
|
26
|
+
version='0.0.38',
|
27
27
|
description='A library for automatically finding the optimal model within feature and hyperparameter space.',
|
28
28
|
long_description=long_description,
|
29
29
|
long_description_content_type='text/markdown',
|
@@ -175,9 +175,11 @@ class CatboostModel(Model):
|
|
175
175
|
label=eval_y,
|
176
176
|
cat_features=eval_x.select_dtypes(include="category").columns.tolist(),
|
177
177
|
)
|
178
|
-
if eval_x is not None
|
178
|
+
if eval_x is not None
|
179
179
|
else None
|
180
180
|
)
|
181
|
+
if self._best_iteration is not None:
|
182
|
+
eval_pool = None
|
181
183
|
catboost.fit(
|
182
184
|
train_pool,
|
183
185
|
early_stopping_rounds=self._early_stopping_rounds,
|
@@ -2,6 +2,7 @@
|
|
2
2
|
# pylint: disable=duplicate-code,too-many-arguments,too-many-positional-arguments
|
3
3
|
|
4
4
|
import json
|
5
|
+
import logging
|
5
6
|
import os
|
6
7
|
import pickle
|
7
8
|
from typing import Any, Self
|
@@ -13,6 +14,7 @@ import torch
|
|
13
14
|
from tabpfn_extensions.post_hoc_ensembles.sklearn_interface import ( # type: ignore
|
14
15
|
AutoTabPFNClassifier, AutoTabPFNRegressor)
|
15
16
|
|
17
|
+
from ..exceptions import WavetrainException
|
16
18
|
from ..model_type import ModelType, determine_model_type
|
17
19
|
from .model import PREDICTION_COLUMN, PROBABILITY_COLUMN_PREFIX, Model
|
18
20
|
|
@@ -104,7 +106,11 @@ class TabPFNModel(Model):
|
|
104
106
|
raise ValueError("y is null.")
|
105
107
|
self._model_type = determine_model_type(y)
|
106
108
|
tabpfn = self._provide_tabpfn()
|
107
|
-
|
109
|
+
try:
|
110
|
+
tabpfn.fit(df, y)
|
111
|
+
except ValueError as exc:
|
112
|
+
logging.warning(str(exc))
|
113
|
+
raise WavetrainException() from exc
|
108
114
|
return self
|
109
115
|
|
110
116
|
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
@@ -56,8 +56,8 @@ class BaseSelectorReducer(Reducer):
|
|
56
56
|
if len(df.columns) <= 1:
|
57
57
|
return self
|
58
58
|
try:
|
59
|
-
self._base_selector.fit(df) # type: ignore
|
60
|
-
except ValueError as exc:
|
59
|
+
self._base_selector.fit(df, y=y) # type: ignore
|
60
|
+
except (ValueError, AttributeError) as exc:
|
61
61
|
logging.warning(str(exc))
|
62
62
|
if self.should_raise():
|
63
63
|
raise WavetrainException() from exc
|
@@ -66,4 +66,10 @@ class BaseSelectorReducer(Reducer):
|
|
66
66
|
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
67
67
|
if len(df.columns) <= 1:
|
68
68
|
return df
|
69
|
-
|
69
|
+
try:
|
70
|
+
return self._base_selector.transform(df)
|
71
|
+
except (ValueError, AttributeError) as exc:
|
72
|
+
logging.warning(str(exc))
|
73
|
+
if self.should_raise():
|
74
|
+
raise WavetrainException() from exc
|
75
|
+
return df
|
@@ -13,6 +13,8 @@ from .correlation_reducer import CorrelationReducer
|
|
13
13
|
from .duplicate_reducer import DuplicateReducer
|
14
14
|
from .nonnumeric_reducer import NonNumericReducer
|
15
15
|
from .reducer import Reducer
|
16
|
+
from .select_by_single_feature_performance_reducer import \
|
17
|
+
SelectBySingleFeaturePerformanceReducer
|
16
18
|
from .smart_correlation_reducer import SmartCorrelationReducer
|
17
19
|
from .unseen_reducer import UnseenReducer
|
18
20
|
|
@@ -35,6 +37,7 @@ class CombinedReducer(Reducer):
|
|
35
37
|
DuplicateReducer(),
|
36
38
|
CorrelationReducer(),
|
37
39
|
SmartCorrelationReducer(),
|
40
|
+
SelectBySingleFeaturePerformanceReducer(),
|
38
41
|
]
|
39
42
|
self._folder = None
|
40
43
|
|
@@ -67,6 +70,8 @@ class CombinedReducer(Reducer):
|
|
67
70
|
self._reducers.append(UnseenReducer())
|
68
71
|
elif reducer_name == SmartCorrelationReducer.name():
|
69
72
|
self._reducers.append(SmartCorrelationReducer())
|
73
|
+
elif reducer_name == SelectBySingleFeaturePerformanceReducer.name():
|
74
|
+
self._reducers.append(SelectBySingleFeaturePerformanceReducer())
|
70
75
|
for reducer in self._reducers:
|
71
76
|
reducer.load(folder)
|
72
77
|
self._folder = folder
|
@@ -95,7 +100,7 @@ class CombinedReducer(Reducer):
|
|
95
100
|
removed_columns_dict = {}
|
96
101
|
for reducer in self._reducers:
|
97
102
|
before_columns = set(df.columns.values)
|
98
|
-
df = reducer.fit_transform(df)
|
103
|
+
df = reducer.fit_transform(df, y=y)
|
99
104
|
after_columns = set(df.columns.values)
|
100
105
|
removed_columns = before_columns.difference(after_columns)
|
101
106
|
if removed_columns:
|
@@ -0,0 +1,57 @@
|
|
1
|
+
"""A reducer that removes features by their single performance via further heuristics."""
|
2
|
+
|
3
|
+
from typing import Self
|
4
|
+
|
5
|
+
import optuna
|
6
|
+
import pandas as pd
|
7
|
+
from feature_engine.selection import SelectBySingleFeaturePerformance
|
8
|
+
from sklearn.ensemble import RandomForestClassifier # type: ignore
|
9
|
+
|
10
|
+
from .base_selector_reducer import BaseSelectorReducer
|
11
|
+
from ..model_type import ModelType, determine_model_type
|
12
|
+
|
13
|
+
_SINGLE_FEATURE_PERFORMANCE_REDUCER_FILENAME = (
|
14
|
+
"single_feature_performance_reducer.joblib"
|
15
|
+
)
|
16
|
+
_SINGLE_FEATURE_PERFORMANCE_REDUCER_THRESHOLD = (
|
17
|
+
"single_feature_performance_reducer_threshold"
|
18
|
+
)
|
19
|
+
|
20
|
+
|
21
|
+
class SelectBySingleFeaturePerformanceReducer(BaseSelectorReducer):
|
22
|
+
"""A class that removes smart correlated values from a dataset."""
|
23
|
+
|
24
|
+
def __init__(self) -> None:
|
25
|
+
self._singlefeatureperformance_selector = SelectBySingleFeaturePerformance(
|
26
|
+
RandomForestClassifier(random_state=42), scoring="accuracy"
|
27
|
+
)
|
28
|
+
super().__init__(
|
29
|
+
self._singlefeatureperformance_selector,
|
30
|
+
_SINGLE_FEATURE_PERFORMANCE_REDUCER_FILENAME,
|
31
|
+
)
|
32
|
+
|
33
|
+
@classmethod
|
34
|
+
def name(cls) -> str:
|
35
|
+
return "single_feature_performance"
|
36
|
+
|
37
|
+
@classmethod
|
38
|
+
def should_raise(cls) -> bool:
|
39
|
+
return False
|
40
|
+
|
41
|
+
def set_options(
|
42
|
+
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
43
|
+
) -> None:
|
44
|
+
self._singlefeatureperformance_selector.threshold = trial.suggest_float(
|
45
|
+
_SINGLE_FEATURE_PERFORMANCE_REDUCER_THRESHOLD, 0.1, 0.9
|
46
|
+
)
|
47
|
+
|
48
|
+
def fit(
|
49
|
+
self,
|
50
|
+
df: pd.DataFrame,
|
51
|
+
y: pd.Series | pd.DataFrame | None = None,
|
52
|
+
w: pd.Series | None = None,
|
53
|
+
eval_x: pd.DataFrame | None = None,
|
54
|
+
eval_y: pd.Series | pd.DataFrame | None = None,
|
55
|
+
) -> Self:
|
56
|
+
self._singlefeatureperformance_selector.scoring = "r2" if determine_model_type(y) == ModelType.REGRESSION else "accuracy"
|
57
|
+
return super().fit(df, y=y, w=w, eval_x=eval_x, eval_y=eval_y)
|
@@ -29,6 +29,10 @@ class SmartCorrelationReducer(BaseSelectorReducer):
|
|
29
29
|
def name(cls) -> str:
|
30
30
|
return "smart_correlation"
|
31
31
|
|
32
|
+
@classmethod
|
33
|
+
def should_raise(cls) -> bool:
|
34
|
+
return False
|
35
|
+
|
32
36
|
def set_options(
|
33
37
|
self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
|
34
38
|
) -> None:
|
@@ -244,7 +244,7 @@ class Trainer(Fit):
|
|
244
244
|
# Perform common reductions
|
245
245
|
reducer = CombinedReducer()
|
246
246
|
reducer.set_options(trial, x)
|
247
|
-
x_train = reducer.fit_transform(x_train)
|
247
|
+
x_train = reducer.fit_transform(x_train, y=y_train)
|
248
248
|
x_test = reducer.transform(x_test)
|
249
249
|
|
250
250
|
# Calculate the row weights
|
@@ -42,6 +42,7 @@ wavetrainer/reducer/duplicate_reducer.py
|
|
42
42
|
wavetrainer/reducer/non_categorical_numeric_columns.py
|
43
43
|
wavetrainer/reducer/nonnumeric_reducer.py
|
44
44
|
wavetrainer/reducer/reducer.py
|
45
|
+
wavetrainer/reducer/select_by_single_feature_performance_reducer.py
|
45
46
|
wavetrainer/reducer/smart_correlation_reducer.py
|
46
47
|
wavetrainer/reducer/unseen_reducer.py
|
47
48
|
wavetrainer/selector/__init__.py
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{wavetrainer-0.0.36 → wavetrainer-0.0.38}/wavetrainer/reducer/non_categorical_numeric_columns.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|