wavetrainer 0.0.25__tar.gz → 0.0.26__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {wavetrainer-0.0.25/wavetrainer.egg-info → wavetrainer-0.0.26}/PKG-INFO +1 -1
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/setup.py +1 -1
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/__init__.py +1 -1
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/create.py +2 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/reducer/combined_reducer.py +4 -3
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/reducer/pca_reducer.py +27 -10
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/trainer.py +7 -2
- {wavetrainer-0.0.25 → wavetrainer-0.0.26/wavetrainer.egg-info}/PKG-INFO +1 -1
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/LICENSE +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/MANIFEST.in +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/README.md +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/requirements.txt +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/setup.cfg +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/tests/__init__.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/tests/model/__init__.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/tests/model/catboost_kwargs_test.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/tests/trainer_test.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/calibrator/__init__.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/calibrator/calibrator.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/calibrator/calibrator_router.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/calibrator/mapie_calibrator.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/calibrator/vennabers_calibrator.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/exceptions.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/fit.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/model/__init__.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/model/catboost_classifier_wrap.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/model/catboost_kwargs.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/model/catboost_model.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/model/catboost_regressor_wrap.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/model/model.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/model/model_router.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/model/tabpfn_model.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/model_type.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/params.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/reducer/__init__.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/reducer/base_selector_reducer.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/reducer/constant_reducer.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/reducer/correlation_reducer.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/reducer/duplicate_reducer.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/reducer/nonnumeric_reducer.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/reducer/reducer.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/reducer/unseen_reducer.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/selector/__init__.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/selector/selector.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/weights/__init__.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/weights/class_weights.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/weights/combined_weights.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/weights/exponential_weights.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/weights/linear_weights.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/weights/noop_weights.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/weights/sigmoid_weights.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/weights/weights.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/weights/weights_router.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/windower/__init__.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/windower/windower.py +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer.egg-info/SOURCES.txt +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer.egg-info/dependency_links.txt +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer.egg-info/not-zip-safe +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer.egg-info/requires.txt +0 -0
- {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
|
|
23
23
|
|
24
24
|
setup(
|
25
25
|
name='wavetrainer',
|
26
|
-
version='0.0.
|
26
|
+
version='0.0.26',
|
27
27
|
description='A library for automatically finding the optimal model within feature and hyperparameter space.',
|
28
28
|
long_description=long_description,
|
29
29
|
long_description_content_type='text/markdown',
|
@@ -15,6 +15,7 @@ def create(
|
|
15
15
|
dt_column: str | None = None,
|
16
16
|
max_train_timeout: datetime.timedelta | None = None,
|
17
17
|
cutoff_dt: datetime.datetime | None = None,
|
18
|
+
max_features: int | None = None,
|
18
19
|
) -> Trainer:
|
19
20
|
"""Create a trainer."""
|
20
21
|
return Trainer(
|
@@ -25,4 +26,5 @@ def create(
|
|
25
26
|
dt_column=dt_column,
|
26
27
|
max_train_timeout=max_train_timeout,
|
27
28
|
cutoff_dt=cutoff_dt,
|
29
|
+
max_features=max_features,
|
28
30
|
)
|
@@ -25,15 +25,16 @@ class CombinedReducer(Reducer):
|
|
25
25
|
|
26
26
|
# pylint: disable=too-many-positional-arguments,too-many-arguments
|
27
27
|
|
28
|
-
def __init__(self):
|
28
|
+
def __init__(self, max_features: int | None):
|
29
29
|
super().__init__()
|
30
|
+
self._max_features = max_features
|
30
31
|
self._reducers = [
|
31
32
|
UnseenReducer(),
|
32
33
|
NonNumericReducer(),
|
33
34
|
ConstantReducer(),
|
34
35
|
DuplicateReducer(),
|
35
36
|
CorrelationReducer(),
|
36
|
-
PCAReducer(),
|
37
|
+
PCAReducer(max_features),
|
37
38
|
]
|
38
39
|
|
39
40
|
@classmethod
|
@@ -62,7 +63,7 @@ class CombinedReducer(Reducer):
|
|
62
63
|
elif reducer_name == UnseenReducer.name():
|
63
64
|
self._reducers.append(UnseenReducer())
|
64
65
|
elif reducer_name == PCAReducer.name():
|
65
|
-
self._reducers.append(PCAReducer())
|
66
|
+
self._reducers.append(PCAReducer(self._max_features))
|
66
67
|
for reducer in self._reducers:
|
67
68
|
reducer.load(folder)
|
68
69
|
|
@@ -20,10 +20,15 @@ class PCAReducer(Reducer):
|
|
20
20
|
|
21
21
|
# pylint: disable=too-many-positional-arguments,too-many-arguments
|
22
22
|
|
23
|
-
def __init__(self):
|
23
|
+
def __init__(self, max_features: int | None):
|
24
24
|
super().__init__()
|
25
|
-
self.
|
26
|
-
|
25
|
+
self._max_features = max_features
|
26
|
+
if max_features is not None:
|
27
|
+
self._scaler = StandardScaler()
|
28
|
+
self._pca = PCA(n_components=max_features)
|
29
|
+
else:
|
30
|
+
self._scaler = None
|
31
|
+
self._pca = None
|
27
32
|
|
28
33
|
@classmethod
|
29
34
|
def name(cls) -> str:
|
@@ -33,12 +38,18 @@ class PCAReducer(Reducer):
|
|
33
38
|
pass
|
34
39
|
|
35
40
|
def load(self, folder: str) -> None:
|
36
|
-
|
37
|
-
|
41
|
+
pca_scaler_file = os.path.join(folder, _PCA_SCALER_FILE)
|
42
|
+
pca_file = os.path.join(folder, _PCA_FILE)
|
43
|
+
if os.path.exists(pca_scaler_file):
|
44
|
+
self._scaler = joblib.load(pca_scaler_file)
|
45
|
+
if os.path.exists(pca_file):
|
46
|
+
self._pca = joblib.load(pca_file)
|
38
47
|
|
39
48
|
def save(self, folder: str) -> None:
|
40
|
-
|
41
|
-
|
49
|
+
if self._scaler is not None:
|
50
|
+
joblib.dump(self._scaler, os.path.join(folder, _PCA_SCALER_FILE))
|
51
|
+
if self._pca is not None:
|
52
|
+
joblib.dump(self._pca, os.path.join(folder, _PCA_FILE))
|
42
53
|
|
43
54
|
def fit(
|
44
55
|
self,
|
@@ -48,13 +59,19 @@ class PCAReducer(Reducer):
|
|
48
59
|
eval_x: pd.DataFrame | None = None,
|
49
60
|
eval_y: pd.Series | pd.DataFrame | None = None,
|
50
61
|
) -> Self:
|
51
|
-
|
62
|
+
pca = self._pca
|
63
|
+
scaler = self._scaler
|
64
|
+
if pca is None or scaler is None:
|
65
|
+
return self
|
66
|
+
if len(df.columns.values) < pca.n_components: # type: ignore
|
52
67
|
return self
|
53
|
-
x_scaled =
|
54
|
-
|
68
|
+
x_scaled = scaler.fit_transform(df)
|
69
|
+
pca.fit(x_scaled)
|
55
70
|
return self
|
56
71
|
|
57
72
|
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
73
|
+
if self._pca is None:
|
74
|
+
return df
|
58
75
|
if len(df.columns.values) < self._pca.n_components: # type: ignore
|
59
76
|
return df
|
60
77
|
return self._pca.transform(df)
|
@@ -36,6 +36,7 @@ _TEST_SIZE_KEY = "test_size"
|
|
36
36
|
_VALIDATION_SIZE_KEY = "validation_size"
|
37
37
|
_IDX_USR_ATTR_KEY = "idx"
|
38
38
|
_DT_COLUMN_KEY = "dt_column"
|
39
|
+
_MAX_FEATURES_KEY = "max_features"
|
39
40
|
|
40
41
|
|
41
42
|
class Trainer(Fit):
|
@@ -53,6 +54,7 @@ class Trainer(Fit):
|
|
53
54
|
dt_column: str | None = None,
|
54
55
|
max_train_timeout: datetime.timedelta | None = None,
|
55
56
|
cutoff_dt: datetime.datetime | None = None,
|
57
|
+
max_features: int | None = None,
|
56
58
|
):
|
57
59
|
tqdm.tqdm.pandas()
|
58
60
|
|
@@ -103,6 +105,7 @@ class Trainer(Fit):
|
|
103
105
|
)
|
104
106
|
if dt_column is None:
|
105
107
|
dt_column = params[_DT_COLUMN_KEY]
|
108
|
+
max_features = params.get(_MAX_FEATURES_KEY)
|
106
109
|
else:
|
107
110
|
with open(params_file, "w", encoding="utf8") as handle:
|
108
111
|
validation_size_value = None
|
@@ -133,6 +136,7 @@ class Trainer(Fit):
|
|
133
136
|
_TEST_SIZE_KEY: test_size_value,
|
134
137
|
_VALIDATION_SIZE_KEY: validation_size_value,
|
135
138
|
_DT_COLUMN_KEY: dt_column,
|
139
|
+
_MAX_FEATURES_KEY: max_features,
|
136
140
|
},
|
137
141
|
handle,
|
138
142
|
)
|
@@ -143,6 +147,7 @@ class Trainer(Fit):
|
|
143
147
|
self._dt_column = dt_column
|
144
148
|
self._max_train_timeout = max_train_timeout
|
145
149
|
self._cutoff_dt = cutoff_dt
|
150
|
+
self._max_features = max_features
|
146
151
|
|
147
152
|
def _provide_study(self, column: str) -> optuna.Study:
|
148
153
|
storage_name = f"sqlite:///{self._folder}/{column}/{_STUDYDB_FILENAME}"
|
@@ -216,7 +221,7 @@ class Trainer(Fit):
|
|
216
221
|
return -1.0
|
217
222
|
|
218
223
|
# Perform common reductions
|
219
|
-
reducer = CombinedReducer()
|
224
|
+
reducer = CombinedReducer(self._max_features)
|
220
225
|
reducer.set_options(trial)
|
221
226
|
x_train = reducer.fit_transform(x_train)
|
222
227
|
x_test = reducer.transform(x_test)
|
@@ -425,7 +430,7 @@ class Trainer(Fit):
|
|
425
430
|
date_str = dates[-1].isoformat()
|
426
431
|
folder = os.path.join(column_path, date_str)
|
427
432
|
|
428
|
-
reducer = CombinedReducer()
|
433
|
+
reducer = CombinedReducer(self._max_features)
|
429
434
|
reducer.load(folder)
|
430
435
|
|
431
436
|
model = ModelRouter()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|