wavetrainer 0.0.38__tar.gz → 0.0.39__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {wavetrainer-0.0.38/wavetrainer.egg-info → wavetrainer-0.0.39}/PKG-INFO +1 -1
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/setup.py +1 -1
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/__init__.py +1 -1
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/model/catboost_model.py +3 -2
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/reducer/combined_reducer.py +6 -1
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/reducer/select_by_single_feature_performance_reducer.py +6 -3
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/trainer.py +17 -3
- {wavetrainer-0.0.38 → wavetrainer-0.0.39/wavetrainer.egg-info}/PKG-INFO +1 -1
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/LICENSE +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/MANIFEST.in +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/README.md +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/requirements.txt +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/setup.cfg +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/tests/__init__.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/tests/model/__init__.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/tests/model/catboost_kwargs_test.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/tests/trainer_test.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/calibrator/__init__.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/calibrator/calibrator.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/calibrator/calibrator_router.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/calibrator/mapie_calibrator.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/calibrator/vennabers_calibrator.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/create.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/exceptions.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/fit.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/model/__init__.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/model/catboost_classifier_wrap.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/model/catboost_kwargs.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/model/catboost_regressor_wrap.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/model/model.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/model/model_router.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/model/tabpfn_model.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/model_type.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/params.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/reducer/__init__.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/reducer/base_selector_reducer.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/reducer/constant_reducer.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/reducer/correlation_reducer.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/reducer/duplicate_reducer.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/reducer/non_categorical_numeric_columns.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/reducer/nonnumeric_reducer.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/reducer/reducer.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/reducer/smart_correlation_reducer.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/reducer/unseen_reducer.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/selector/__init__.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/selector/selector.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/weights/__init__.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/weights/class_weights.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/weights/combined_weights.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/weights/exponential_weights.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/weights/linear_weights.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/weights/noop_weights.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/weights/sigmoid_weights.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/weights/weights.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/weights/weights_router.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/windower/__init__.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/windower/windower.py +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer.egg-info/SOURCES.txt +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer.egg-info/dependency_links.txt +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer.egg-info/not-zip-safe +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer.egg-info/requires.txt +0 -0
- {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
|
|
23
23
|
|
24
24
|
setup(
|
25
25
|
name='wavetrainer',
|
26
|
-
version='0.0.
|
26
|
+
version='0.0.39',
|
27
27
|
description='A library for automatically finding the optimal model within feature and hyperparameter space.',
|
28
28
|
long_description=long_description,
|
29
29
|
long_description_content_type='text/markdown',
|
@@ -148,7 +148,7 @@ class CatboostModel(Model):
|
|
148
148
|
)
|
149
149
|
catboost = self._provide_catboost()
|
150
150
|
catboost.save_model(os.path.join(folder, _MODEL_FILENAME))
|
151
|
-
trial.
|
151
|
+
trial.set_user_attr(_BEST_ITERATION_KEY, self._best_iteration)
|
152
152
|
|
153
153
|
def fit(
|
154
154
|
self,
|
@@ -219,9 +219,10 @@ class CatboostModel(Model):
|
|
219
219
|
best_iteration if best_iteration is not None else self._iterations
|
220
220
|
)
|
221
221
|
logging.info(
|
222
|
-
"Creating catboost model with depth %d, boosting type %s",
|
222
|
+
"Creating catboost model with depth %d, boosting type %s, best iteration %d",
|
223
223
|
self._depth,
|
224
224
|
self._boosting_type,
|
225
|
+
best_iteration,
|
225
226
|
)
|
226
227
|
match self._model_type:
|
227
228
|
case ModelType.BINARY:
|
@@ -3,6 +3,7 @@
|
|
3
3
|
import json
|
4
4
|
import logging
|
5
5
|
import os
|
6
|
+
import time
|
6
7
|
from typing import Self
|
7
8
|
|
8
9
|
import optuna
|
@@ -37,7 +38,7 @@ class CombinedReducer(Reducer):
|
|
37
38
|
DuplicateReducer(),
|
38
39
|
CorrelationReducer(),
|
39
40
|
SmartCorrelationReducer(),
|
40
|
-
SelectBySingleFeaturePerformanceReducer(),
|
41
|
+
# SelectBySingleFeaturePerformanceReducer(),
|
41
42
|
]
|
42
43
|
self._folder = None
|
43
44
|
|
@@ -99,12 +100,16 @@ class CombinedReducer(Reducer):
|
|
99
100
|
) -> Self:
|
100
101
|
removed_columns_dict = {}
|
101
102
|
for reducer in self._reducers:
|
103
|
+
start_reducer = time.time()
|
102
104
|
before_columns = set(df.columns.values)
|
103
105
|
df = reducer.fit_transform(df, y=y)
|
104
106
|
after_columns = set(df.columns.values)
|
105
107
|
removed_columns = before_columns.difference(after_columns)
|
106
108
|
if removed_columns:
|
107
109
|
removed_columns_dict[reducer.name()] = list(removed_columns)
|
110
|
+
logging.info(
|
111
|
+
"%s reducer took %f", reducer.name(), time.time() - start_reducer
|
112
|
+
)
|
108
113
|
if self._folder is not None:
|
109
114
|
with open(
|
110
115
|
os.path.join(self._folder, _REMOVED_COLUMNS_FILE), encoding="utf8"
|
@@ -1,5 +1,6 @@
|
|
1
1
|
"""A reducer that removes features by their single performance via further heuristics."""
|
2
2
|
|
3
|
+
# pylint: disable=too-many-arguments,too-many-positional-arguments
|
3
4
|
from typing import Self
|
4
5
|
|
5
6
|
import optuna
|
@@ -7,8 +8,8 @@ import pandas as pd
|
|
7
8
|
from feature_engine.selection import SelectBySingleFeaturePerformance
|
8
9
|
from sklearn.ensemble import RandomForestClassifier # type: ignore
|
9
10
|
|
10
|
-
from .base_selector_reducer import BaseSelectorReducer
|
11
11
|
from ..model_type import ModelType, determine_model_type
|
12
|
+
from .base_selector_reducer import BaseSelectorReducer
|
12
13
|
|
13
14
|
_SINGLE_FEATURE_PERFORMANCE_REDUCER_FILENAME = (
|
14
15
|
"single_feature_performance_reducer.joblib"
|
@@ -23,7 +24,7 @@ class SelectBySingleFeaturePerformanceReducer(BaseSelectorReducer):
|
|
23
24
|
|
24
25
|
def __init__(self) -> None:
|
25
26
|
self._singlefeatureperformance_selector = SelectBySingleFeaturePerformance(
|
26
|
-
RandomForestClassifier(random_state=42), scoring="accuracy"
|
27
|
+
RandomForestClassifier(random_state=42, n_jobs=-1), scoring="accuracy", cv=1
|
27
28
|
)
|
28
29
|
super().__init__(
|
29
30
|
self._singlefeatureperformance_selector,
|
@@ -53,5 +54,7 @@ class SelectBySingleFeaturePerformanceReducer(BaseSelectorReducer):
|
|
53
54
|
eval_x: pd.DataFrame | None = None,
|
54
55
|
eval_y: pd.Series | pd.DataFrame | None = None,
|
55
56
|
) -> Self:
|
56
|
-
self._singlefeatureperformance_selector.scoring =
|
57
|
+
self._singlefeatureperformance_selector.scoring = (
|
58
|
+
"r2" if determine_model_type(y) == ModelType.REGRESSION else "accuracy" # type: ignore
|
59
|
+
)
|
57
60
|
return super().fit(df, y=y, w=w, eval_x=eval_x, eval_y=eval_y)
|
@@ -6,6 +6,7 @@ import json
|
|
6
6
|
import logging
|
7
7
|
import os
|
8
8
|
import pickle
|
9
|
+
import time
|
9
10
|
from typing import Self
|
10
11
|
|
11
12
|
import optuna
|
@@ -231,6 +232,7 @@ class Trainer(Fit):
|
|
231
232
|
|
232
233
|
try:
|
233
234
|
# Window the data
|
235
|
+
start_windower = time.time()
|
234
236
|
windower = Windower(self._dt_column)
|
235
237
|
windower.set_options(trial, x)
|
236
238
|
x_train = windower.fit_transform(x_train)
|
@@ -240,25 +242,31 @@ class Trainer(Fit):
|
|
240
242
|
os.removedirs(folder)
|
241
243
|
logging.warning("Y train only contains 1 unique datapoint.")
|
242
244
|
return _BAD_OUTPUT
|
245
|
+
logging.info("Windowing took %f", time.time() - start_windower)
|
243
246
|
|
244
247
|
# Perform common reductions
|
248
|
+
start_reducer = time.time()
|
245
249
|
reducer = CombinedReducer()
|
246
250
|
reducer.set_options(trial, x)
|
247
251
|
x_train = reducer.fit_transform(x_train, y=y_train)
|
248
252
|
x_test = reducer.transform(x_test)
|
253
|
+
logging.info("Reducing took %f", time.time() - start_reducer)
|
249
254
|
|
250
255
|
# Calculate the row weights
|
256
|
+
start_row_weights = time.time()
|
251
257
|
weights = CombinedWeights()
|
252
258
|
weights.set_options(trial, x)
|
253
259
|
w = weights.fit(x_train, y=y_train).transform(y_train.to_frame())[
|
254
260
|
WEIGHTS_COLUMN
|
255
261
|
]
|
262
|
+
logging.info("Row weights took %f", time.time() - start_row_weights)
|
256
263
|
|
257
264
|
# Create model
|
258
265
|
model = ModelRouter()
|
259
266
|
model.set_options(trial, x)
|
260
267
|
|
261
268
|
# Train
|
269
|
+
start_train = time.time()
|
262
270
|
selector = Selector(model)
|
263
271
|
selector.set_options(trial, x)
|
264
272
|
selector.fit(x_train, y=y_train, w=w, eval_x=x_test, eval_y=y_test)
|
@@ -267,11 +275,14 @@ class Trainer(Fit):
|
|
267
275
|
x_pred = model.fit_transform(
|
268
276
|
x_train, y=y_train, w=w, eval_x=x_test, eval_y=y_test
|
269
277
|
)
|
278
|
+
logging.info("Training took %f", time.time() - start_train)
|
270
279
|
|
271
280
|
# Calibrate
|
281
|
+
start_calibrate = time.time()
|
272
282
|
calibrator = CalibratorRouter(model)
|
273
283
|
calibrator.set_options(trial, x)
|
274
284
|
calibrator.fit(x_pred, y=y_train)
|
285
|
+
logging.info("Calibrating took %f", time.time() - start_calibrate)
|
275
286
|
|
276
287
|
# Output
|
277
288
|
y_pred = model.transform(x_test)
|
@@ -521,8 +532,11 @@ class Trainer(Fit):
|
|
521
532
|
date_path = os.path.join(column_path, date_str)
|
522
533
|
if not os.path.isdir(date_path):
|
523
534
|
continue
|
524
|
-
|
525
|
-
|
526
|
-
|
535
|
+
try:
|
536
|
+
model = ModelRouter()
|
537
|
+
model.load(date_path)
|
538
|
+
feature_importances[date_str] = model.feature_importances
|
539
|
+
except FileNotFoundError as exc:
|
540
|
+
logging.warning(str(exc))
|
527
541
|
|
528
542
|
return feature_importances
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/reducer/non_categorical_numeric_columns.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|