wavetrainer 0.1.11__tar.gz → 0.1.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {wavetrainer-0.1.11/wavetrainer.egg-info → wavetrainer-0.1.12}/PKG-INFO +3 -1
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/README.md +1 -0
- wavetrainer-0.1.11/wavetrainer.egg-info/requires.txt → wavetrainer-0.1.12/requirements.txt +1 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/setup.py +1 -1
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/__init__.py +1 -1
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/lightgbm/lightgbm_model.py +1 -1
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/reducer/combined_reducer.py +1 -2
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/trainer.py +69 -12
- {wavetrainer-0.1.11 → wavetrainer-0.1.12/wavetrainer.egg-info}/PKG-INFO +3 -1
- wavetrainer-0.1.11/requirements.txt → wavetrainer-0.1.12/wavetrainer.egg-info/requires.txt +2 -1
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/LICENSE +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/MANIFEST.in +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/setup.cfg +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/tests/__init__.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/tests/model/__init__.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/tests/model/catboost_kwargs_test.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/tests/trainer_test.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/calibrator/__init__.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/calibrator/calibrator.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/calibrator/calibrator_router.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/calibrator/vennabers_calibrator.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/create.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/exceptions.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/fit.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/__init__.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/catboost/__init__.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/catboost/catboost_classifier_wrap.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/catboost/catboost_kwargs.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/catboost/catboost_model.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/catboost/catboost_regressor_wrap.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/lightgbm/__init__.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/model.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/model_router.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/tabpfn/__init__.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/tabpfn/tabpfn_model.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/xgboost/__init__.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/xgboost/early_stopper.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/xgboost/xgboost_logger.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/xgboost/xgboost_model.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model_type.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/params.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/reducer/__init__.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/reducer/base_selector_reducer.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/reducer/constant_reducer.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/reducer/correlation_reducer.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/reducer/duplicate_reducer.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/reducer/non_categorical_numeric_columns.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/reducer/nonnumeric_reducer.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/reducer/pca_reducer.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/reducer/reducer.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/reducer/select_by_single_feature_performance_reducer.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/reducer/smart_correlation_reducer.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/reducer/unseen_reducer.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/selector/__init__.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/selector/selector.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/weights/__init__.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/weights/class_weights.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/weights/combined_weights.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/weights/exponential_weights.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/weights/linear_weights.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/weights/noop_weights.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/weights/sigmoid_weights.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/weights/weights.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/weights/weights_router.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/windower/__init__.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/windower/windower.py +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer.egg-info/SOURCES.txt +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer.egg-info/dependency_links.txt +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer.egg-info/not-zip-safe +0 -0
- {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: wavetrainer
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.12
|
4
4
|
Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
|
5
5
|
Home-page: https://github.com/8W9aG/wavetrainer
|
6
6
|
Author: Will Sackfield
|
@@ -30,6 +30,7 @@ Requires-Dist: tabpfn_extensions>=0.0.4
|
|
30
30
|
Requires-Dist: hyperopt>=0.2.7
|
31
31
|
Requires-Dist: pycaleva>=0.8.2
|
32
32
|
Requires-Dist: lightgbm>=4.6.0
|
33
|
+
Requires-Dist: kaleido>=0.2.1
|
33
34
|
|
34
35
|
# wavetrainer
|
35
36
|
|
@@ -66,6 +67,7 @@ Python 3.11.6:
|
|
66
67
|
- [hyperopt](https://github.com/hyperopt/hyperopt)
|
67
68
|
- [pycaleva](https://github.com/MartinWeigl/pycaleva)
|
68
69
|
- [lightgbm](https://github.com/microsoft/LightGBM)
|
70
|
+
- [kaleido](https://github.com/plotly/Kaleido)
|
69
71
|
|
70
72
|
## Raison D'être :thought_balloon:
|
71
73
|
|
@@ -33,6 +33,7 @@ Python 3.11.6:
|
|
33
33
|
- [hyperopt](https://github.com/hyperopt/hyperopt)
|
34
34
|
- [pycaleva](https://github.com/MartinWeigl/pycaleva)
|
35
35
|
- [lightgbm](https://github.com/microsoft/LightGBM)
|
36
|
+
- [kaleido](https://github.com/plotly/Kaleido)
|
36
37
|
|
37
38
|
## Raison D'être :thought_balloon:
|
38
39
|
|
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
|
|
23
23
|
|
24
24
|
setup(
|
25
25
|
name='wavetrainer',
|
26
|
-
version='0.1.
|
26
|
+
version='0.1.12',
|
27
27
|
description='A library for automatically finding the optimal model within feature and hyperparameter space.',
|
28
28
|
long_description=long_description,
|
29
29
|
long_description_content_type='text/markdown',
|
@@ -148,7 +148,7 @@ class LightGBMModel(Model):
|
|
148
148
|
|
149
149
|
eval_set = None
|
150
150
|
callbacks = []
|
151
|
-
if eval_x is None
|
151
|
+
if eval_x is not None and eval_y is not None:
|
152
152
|
eval_set = [(eval_x, eval_y.to_numpy().flatten())] # type: ignore
|
153
153
|
callbacks = [
|
154
154
|
lgb.early_stopping(stopping_rounds=early_stopping_rounds),
|
@@ -2,7 +2,6 @@
|
|
2
2
|
|
3
3
|
# pylint: disable=line-too-long
|
4
4
|
import json
|
5
|
-
import logging
|
6
5
|
import os
|
7
6
|
import time
|
8
7
|
from typing import Self
|
@@ -129,6 +128,6 @@ class CombinedReducer(Reducer):
|
|
129
128
|
try:
|
130
129
|
df = reducer.transform(df)
|
131
130
|
except ValueError as exc:
|
132
|
-
|
131
|
+
print("Failed to reduce %s", reducer.name())
|
133
132
|
raise exc
|
134
133
|
return df
|
@@ -1,5 +1,6 @@
|
|
1
1
|
"""The trainer class."""
|
2
2
|
|
3
|
+
# pylint: disable=line-too-long
|
3
4
|
import datetime
|
4
5
|
import functools
|
5
6
|
import json
|
@@ -12,12 +13,14 @@ from typing import Self
|
|
12
13
|
import optuna
|
13
14
|
import pandas as pd
|
14
15
|
import tqdm
|
15
|
-
from sklearn.metrics import f1_score
|
16
|
+
from sklearn.metrics import f1_score # type: ignore
|
17
|
+
from sklearn.metrics import (accuracy_score, brier_score_loss, log_loss,
|
18
|
+
precision_score, r2_score, recall_score)
|
16
19
|
|
17
20
|
from .calibrator.calibrator_router import CalibratorRouter
|
18
21
|
from .exceptions import WavetrainException
|
19
22
|
from .fit import Fit
|
20
|
-
from .model.model import PREDICTION_COLUMN
|
23
|
+
from .model.model import PREDICTION_COLUMN, PROBABILITY_COLUMN_PREFIX
|
21
24
|
from .model.model_router import ModelRouter
|
22
25
|
from .model_type import ModelType, determine_model_type
|
23
26
|
from .reducer.combined_reducer import CombinedReducer
|
@@ -48,6 +51,11 @@ def _assign_bin(timestamp, bins: list[datetime.datetime]) -> int:
|
|
48
51
|
return len(bins) - 2 # Assign to last bin if at the end
|
49
52
|
|
50
53
|
|
54
|
+
def _best_trial(study: optuna.Study) -> optuna.trial.FrozenTrial:
|
55
|
+
best_brier = min(study.best_trials, key=lambda t: t.values[1])
|
56
|
+
return best_brier
|
57
|
+
|
58
|
+
|
51
59
|
class Trainer(Fit):
|
52
60
|
"""A class for training and predicting from an array of data."""
|
53
61
|
|
@@ -170,7 +178,10 @@ class Trainer(Fit):
|
|
170
178
|
storage=storage_name,
|
171
179
|
load_if_exists=True,
|
172
180
|
sampler=restored_sampler,
|
173
|
-
|
181
|
+
directions=[
|
182
|
+
optuna.study.StudyDirection.MAXIMIZE,
|
183
|
+
optuna.study.StudyDirection.MINIMIZE,
|
184
|
+
],
|
174
185
|
)
|
175
186
|
|
176
187
|
def fit(
|
@@ -210,7 +221,7 @@ class Trainer(Fit):
|
|
210
221
|
save: bool,
|
211
222
|
split_idx: datetime.datetime,
|
212
223
|
no_evaluation: bool,
|
213
|
-
) -> float:
|
224
|
+
) -> tuple[float, float]:
|
214
225
|
print(f"Beginning trial for: {split_idx.isoformat()}")
|
215
226
|
trial.set_user_attr(_IDX_USR_ATTR_KEY, split_idx.isoformat())
|
216
227
|
folder = os.path.join(
|
@@ -246,7 +257,7 @@ class Trainer(Fit):
|
|
246
257
|
if new_folder:
|
247
258
|
os.removedirs(folder)
|
248
259
|
logging.warning("Y train only contains 1 unique datapoint.")
|
249
|
-
return _BAD_OUTPUT
|
260
|
+
return _BAD_OUTPUT, -_BAD_OUTPUT
|
250
261
|
print(f"Windowing took {time.time() - start_windower}")
|
251
262
|
|
252
263
|
# Perform common reductions
|
@@ -311,10 +322,29 @@ class Trainer(Fit):
|
|
311
322
|
)
|
312
323
|
cal_pred[PREDICTION_COLUMN] = y_pred[PREDICTION_COLUMN]
|
313
324
|
output = 0.0
|
325
|
+
loss = 0.0
|
314
326
|
if determine_model_type(y_series) == ModelType.REGRESSION:
|
315
327
|
output = float(r2_score(y_test, y_pred[[PREDICTION_COLUMN]]))
|
328
|
+
print(f"R2: {output}")
|
316
329
|
else:
|
317
330
|
output = float(f1_score(y_test, y_pred[[PREDICTION_COLUMN]]))
|
331
|
+
print(f"F1: {output}")
|
332
|
+
prob_col = PROBABILITY_COLUMN_PREFIX + str(1)
|
333
|
+
if prob_col in y_pred.columns.values.tolist():
|
334
|
+
loss = float(brier_score_loss(y_test, y_pred[[prob_col]]))
|
335
|
+
print(f"Brier: {loss}")
|
336
|
+
print(
|
337
|
+
f"Log Loss: {float(log_loss(y_test.astype(float), y_pred[[prob_col]]))}"
|
338
|
+
)
|
339
|
+
print(
|
340
|
+
f"Accuracy: {float(accuracy_score(y_test, y_pred[[PREDICTION_COLUMN]]))}"
|
341
|
+
)
|
342
|
+
print(
|
343
|
+
f"Precision: {float(precision_score(y_test, y_pred[[PREDICTION_COLUMN]]))}"
|
344
|
+
)
|
345
|
+
print(
|
346
|
+
f"Recall: {float(recall_score(y_test, y_pred[[PREDICTION_COLUMN]]))}"
|
347
|
+
)
|
318
348
|
|
319
349
|
if save:
|
320
350
|
windower.save(folder, trial)
|
@@ -332,13 +362,13 @@ class Trainer(Fit):
|
|
332
362
|
handle,
|
333
363
|
)
|
334
364
|
|
335
|
-
return output
|
365
|
+
return output, loss
|
336
366
|
except WavetrainException as exc:
|
337
367
|
print(str(exc))
|
338
368
|
logging.warning(str(exc))
|
339
369
|
if new_folder:
|
340
370
|
os.removedirs(folder)
|
341
|
-
return _BAD_OUTPUT
|
371
|
+
return _BAD_OUTPUT, -_BAD_OUTPUT
|
342
372
|
|
343
373
|
start_validation_index = (
|
344
374
|
dt_index.to_list()[-int(len(dt_index) * self._validation_size) - 1]
|
@@ -359,7 +389,7 @@ class Trainer(Fit):
|
|
359
389
|
].to_list()[0]
|
360
390
|
)
|
361
391
|
|
362
|
-
def test_objective(trial: optuna.Trial) -> float:
|
392
|
+
def test_objective(trial: optuna.Trial) -> tuple[float, float]:
|
363
393
|
return _fit(
|
364
394
|
trial,
|
365
395
|
test_df,
|
@@ -382,7 +412,8 @@ class Trainer(Fit):
|
|
382
412
|
else self._max_train_timeout.total_seconds(),
|
383
413
|
)
|
384
414
|
while (
|
385
|
-
study.
|
415
|
+
_best_trial(study).values is None
|
416
|
+
or _best_trial(study).values == (_BAD_OUTPUT, -_BAD_OUTPUT)
|
386
417
|
) and len(study.trials) < 1000:
|
387
418
|
logging.info("Performing extra train")
|
388
419
|
study.optimize(
|
@@ -420,7 +451,7 @@ class Trainer(Fit):
|
|
420
451
|
if found:
|
421
452
|
last_processed_dt = test_dt
|
422
453
|
_fit(
|
423
|
-
study
|
454
|
+
_best_trial(study),
|
424
455
|
test_df.copy(),
|
425
456
|
test_series,
|
426
457
|
True,
|
@@ -441,7 +472,7 @@ class Trainer(Fit):
|
|
441
472
|
|
442
473
|
def validate_objctive(
|
443
474
|
trial: optuna.Trial, idx: datetime.datetime, series: pd.Series
|
444
|
-
) -> float:
|
475
|
+
) -> tuple[float, float]:
|
445
476
|
return _fit(trial, test_df.copy(), series, False, idx, False)
|
446
477
|
|
447
478
|
study.optimize(
|
@@ -457,10 +488,36 @@ class Trainer(Fit):
|
|
457
488
|
break
|
458
489
|
|
459
490
|
_fit(
|
460
|
-
study
|
491
|
+
_best_trial(study),
|
492
|
+
test_df.copy(),
|
493
|
+
test_series,
|
494
|
+
True,
|
495
|
+
test_idx,
|
496
|
+
True,
|
461
497
|
)
|
462
498
|
last_processed_dt = test_idx
|
463
499
|
|
500
|
+
target_names = ["F1", "Brier"]
|
501
|
+
fig = optuna.visualization.plot_pareto_front(
|
502
|
+
study, target_names=target_names
|
503
|
+
)
|
504
|
+
fig.write_image(
|
505
|
+
os.path.join(column_dir, "pareto_frontier.png"),
|
506
|
+
format="png",
|
507
|
+
width=800,
|
508
|
+
height=600,
|
509
|
+
)
|
510
|
+
for target_name in target_names:
|
511
|
+
fig = optuna.visualization.plot_param_importances(
|
512
|
+
study, target=lambda t: t.values[0], target_name=target_name
|
513
|
+
)
|
514
|
+
fig.write_image(
|
515
|
+
os.path.join(column_dir, f"{target_name}_frontier.png"),
|
516
|
+
format="png",
|
517
|
+
width=800,
|
518
|
+
height=600,
|
519
|
+
)
|
520
|
+
|
464
521
|
if isinstance(y, pd.Series):
|
465
522
|
_fit_column(y)
|
466
523
|
else:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: wavetrainer
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.12
|
4
4
|
Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
|
5
5
|
Home-page: https://github.com/8W9aG/wavetrainer
|
6
6
|
Author: Will Sackfield
|
@@ -30,6 +30,7 @@ Requires-Dist: tabpfn_extensions>=0.0.4
|
|
30
30
|
Requires-Dist: hyperopt>=0.2.7
|
31
31
|
Requires-Dist: pycaleva>=0.8.2
|
32
32
|
Requires-Dist: lightgbm>=4.6.0
|
33
|
+
Requires-Dist: kaleido>=0.2.1
|
33
34
|
|
34
35
|
# wavetrainer
|
35
36
|
|
@@ -66,6 +67,7 @@ Python 3.11.6:
|
|
66
67
|
- [hyperopt](https://github.com/hyperopt/hyperopt)
|
67
68
|
- [pycaleva](https://github.com/MartinWeigl/pycaleva)
|
68
69
|
- [lightgbm](https://github.com/microsoft/LightGBM)
|
70
|
+
- [kaleido](https://github.com/plotly/Kaleido)
|
69
71
|
|
70
72
|
## Raison D'être :thought_balloon:
|
71
73
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/catboost/catboost_classifier_wrap.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/catboost/catboost_regressor_wrap.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/reducer/non_categorical_numeric_columns.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|