wavetrainer 0.1.7__tar.gz → 0.1.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {wavetrainer-0.1.7/wavetrainer.egg-info → wavetrainer-0.1.9}/PKG-INFO +3 -1
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/README.md +1 -0
- wavetrainer-0.1.7/wavetrainer.egg-info/requires.txt → wavetrainer-0.1.9/requirements.txt +1 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/setup.py +1 -1
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/__init__.py +1 -1
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/calibrator/calibrator_router.py +32 -1
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/create.py +2 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model/model_router.py +10 -2
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/trainer.py +5 -3
- {wavetrainer-0.1.7 → wavetrainer-0.1.9/wavetrainer.egg-info}/PKG-INFO +3 -1
- wavetrainer-0.1.7/requirements.txt → wavetrainer-0.1.9/wavetrainer.egg-info/requires.txt +2 -1
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/LICENSE +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/MANIFEST.in +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/setup.cfg +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/tests/__init__.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/tests/model/__init__.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/tests/model/catboost_kwargs_test.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/tests/trainer_test.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/calibrator/__init__.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/calibrator/calibrator.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/calibrator/vennabers_calibrator.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/exceptions.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/fit.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model/__init__.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model/catboost/__init__.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model/catboost/catboost_classifier_wrap.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model/catboost/catboost_kwargs.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model/catboost/catboost_model.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model/catboost/catboost_regressor_wrap.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model/model.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model/tabpfn/__init__.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model/tabpfn/tabpfn_model.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model/xgboost/__init__.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model/xgboost/early_stopper.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model/xgboost/xgboost_logger.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model/xgboost/xgboost_model.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model_type.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/params.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/reducer/__init__.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/reducer/base_selector_reducer.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/reducer/combined_reducer.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/reducer/constant_reducer.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/reducer/correlation_reducer.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/reducer/duplicate_reducer.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/reducer/non_categorical_numeric_columns.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/reducer/nonnumeric_reducer.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/reducer/pca_reducer.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/reducer/reducer.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/reducer/select_by_single_feature_performance_reducer.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/reducer/smart_correlation_reducer.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/reducer/unseen_reducer.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/selector/__init__.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/selector/selector.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/weights/__init__.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/weights/class_weights.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/weights/combined_weights.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/weights/exponential_weights.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/weights/linear_weights.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/weights/noop_weights.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/weights/sigmoid_weights.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/weights/weights.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/weights/weights_router.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/windower/__init__.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/windower/windower.py +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer.egg-info/SOURCES.txt +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer.egg-info/dependency_links.txt +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer.egg-info/not-zip-safe +0 -0
- {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: wavetrainer
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.9
|
4
4
|
Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
|
5
5
|
Home-page: https://github.com/8W9aG/wavetrainer
|
6
6
|
Author: Will Sackfield
|
@@ -28,6 +28,7 @@ Requires-Dist: xgboost>=3.0.0
|
|
28
28
|
Requires-Dist: jax>=0.6.1
|
29
29
|
Requires-Dist: tabpfn_extensions>=0.0.4
|
30
30
|
Requires-Dist: hyperopt>=0.2.7
|
31
|
+
Requires-Dist: pycaleva>=0.8.2
|
31
32
|
|
32
33
|
# wavetrainer
|
33
34
|
|
@@ -62,6 +63,7 @@ Python 3.11.6:
|
|
62
63
|
- [jax](https://github.com/jax-ml/jax)
|
63
64
|
- [tabpfn_extensions](https://github.com/PriorLabs/tabpfn-extensions)
|
64
65
|
- [hyperopt](https://github.com/hyperopt/hyperopt)
|
66
|
+
- [pycaleva](https://github.com/MartinWeigl/pycaleva)
|
65
67
|
|
66
68
|
## Raison D'être :thought_balloon:
|
67
69
|
|
@@ -31,6 +31,7 @@ Python 3.11.6:
|
|
31
31
|
- [jax](https://github.com/jax-ml/jax)
|
32
32
|
- [tabpfn_extensions](https://github.com/PriorLabs/tabpfn-extensions)
|
33
33
|
- [hyperopt](https://github.com/hyperopt/hyperopt)
|
34
|
+
- [pycaleva](https://github.com/MartinWeigl/pycaleva)
|
34
35
|
|
35
36
|
## Raison D'être :thought_balloon:
|
36
37
|
|
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
|
|
23
23
|
|
24
24
|
setup(
|
25
25
|
name='wavetrainer',
|
26
|
-
version='0.1.
|
26
|
+
version='0.1.9',
|
27
27
|
description='A library for automatically finding the optimal model within feature and hyperparameter space.',
|
28
28
|
long_description=long_description,
|
29
29
|
long_description_content_type='text/markdown',
|
@@ -5,10 +5,12 @@ import logging
|
|
5
5
|
import os
|
6
6
|
from typing import Self
|
7
7
|
|
8
|
+
import numpy as np
|
8
9
|
import optuna
|
9
10
|
import pandas as pd
|
11
|
+
from pycaleva import CalibrationEvaluator # type: ignore
|
10
12
|
|
11
|
-
from ..model.model import Model
|
13
|
+
from ..model.model import PROBABILITY_COLUMN_PREFIX, Model
|
12
14
|
from ..model_type import ModelType, determine_model_type
|
13
15
|
from .calibrator import Calibrator
|
14
16
|
from .vennabers_calibrator import VennabersCalibrator
|
@@ -26,10 +28,12 @@ class CalibratorRouter(Calibrator):
|
|
26
28
|
# pylint: disable=too-many-positional-arguments,too-many-arguments
|
27
29
|
|
28
30
|
_calibrator: Calibrator | None
|
31
|
+
_ce: CalibrationEvaluator | None
|
29
32
|
|
30
33
|
def __init__(self, model: Model):
|
31
34
|
super().__init__(model)
|
32
35
|
self._calibrator = None
|
36
|
+
self._ce = None
|
33
37
|
|
34
38
|
@classmethod
|
35
39
|
def name(cls) -> str:
|
@@ -75,6 +79,14 @@ class CalibratorRouter(Calibrator):
|
|
75
79
|
},
|
76
80
|
handle,
|
77
81
|
)
|
82
|
+
ce = self._ce
|
83
|
+
if ce is not None:
|
84
|
+
try:
|
85
|
+
ce.calibration_report(
|
86
|
+
os.path.join(folder, "calibration.pdf"), "binary-classifier"
|
87
|
+
)
|
88
|
+
except ValueError as exc:
|
89
|
+
logging.warning(str(exc))
|
78
90
|
|
79
91
|
def fit(
|
80
92
|
self,
|
@@ -94,6 +106,25 @@ class CalibratorRouter(Calibrator):
|
|
94
106
|
calibrator = VennabersCalibrator(self._model)
|
95
107
|
calibrator.fit(df, y=y, w=w)
|
96
108
|
self._calibrator = calibrator
|
109
|
+
|
110
|
+
pred_prob = calibrator.transform(df)
|
111
|
+
pred_prob = pred_prob.drop(
|
112
|
+
columns=[
|
113
|
+
x
|
114
|
+
for x in pred_prob.columns.values.tolist()
|
115
|
+
if not x.startswith(PROBABILITY_COLUMN_PREFIX)
|
116
|
+
],
|
117
|
+
errors="ignore",
|
118
|
+
)
|
119
|
+
ce = CalibrationEvaluator(
|
120
|
+
y.to_numpy(),
|
121
|
+
np.max(pred_prob.to_numpy(), axis=1),
|
122
|
+
outsample=True,
|
123
|
+
n_groups="auto",
|
124
|
+
)
|
125
|
+
print(f"Hosmer Lemeshow: {ce.hosmerlemeshow()}")
|
126
|
+
self._ce = ce
|
127
|
+
|
97
128
|
return self
|
98
129
|
|
99
130
|
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
@@ -16,6 +16,7 @@ def create(
|
|
16
16
|
max_train_timeout: datetime.timedelta | None = None,
|
17
17
|
cutoff_dt: datetime.datetime | None = None,
|
18
18
|
embedding_cols: list[list[str]] | None = None,
|
19
|
+
allowed_models: set[str] | None = None,
|
19
20
|
) -> Trainer:
|
20
21
|
"""Create a trainer."""
|
21
22
|
return Trainer(
|
@@ -27,4 +28,5 @@ def create(
|
|
27
28
|
max_train_timeout=max_train_timeout,
|
28
29
|
cutoff_dt=cutoff_dt,
|
29
30
|
embedding_cols=embedding_cols,
|
31
|
+
allowed_models=allowed_models,
|
30
32
|
)
|
@@ -33,10 +33,13 @@ class ModelRouter(Model):
|
|
33
33
|
_model: Model | None
|
34
34
|
_false_positive_reduction_steps: int | None
|
35
35
|
|
36
|
-
def __init__(self) -> None:
|
36
|
+
def __init__(self, allowed_models: set[str] | None) -> None:
|
37
37
|
super().__init__()
|
38
38
|
self._model = None
|
39
39
|
self._false_positive_reduction_steps = None
|
40
|
+
self._allowed_models = (
|
41
|
+
allowed_models if allowed_models is not None else set(_MODELS.keys())
|
42
|
+
)
|
40
43
|
|
41
44
|
@classmethod
|
42
45
|
def name(cls) -> str:
|
@@ -91,7 +94,12 @@ class ModelRouter(Model):
|
|
91
94
|
_FALSE_POSITIVE_REDUCTION_STEPS_KEY, 0, 5
|
92
95
|
)
|
93
96
|
model_name = trial.suggest_categorical(
|
94
|
-
"model",
|
97
|
+
"model",
|
98
|
+
[
|
99
|
+
k
|
100
|
+
for k, v in _MODELS.items()
|
101
|
+
if v.supports_x(df) and k in self._allowed_models
|
102
|
+
],
|
95
103
|
)
|
96
104
|
print(f"Using {model_name} model")
|
97
105
|
model = _MODELS[model_name]()
|
@@ -64,6 +64,7 @@ class Trainer(Fit):
|
|
64
64
|
max_train_timeout: datetime.timedelta | None = None,
|
65
65
|
cutoff_dt: datetime.datetime | None = None,
|
66
66
|
embedding_cols: list[list[str]] | None = None,
|
67
|
+
allowed_models: set[str] | None = None,
|
67
68
|
):
|
68
69
|
tqdm.tqdm.pandas()
|
69
70
|
|
@@ -155,6 +156,7 @@ class Trainer(Fit):
|
|
155
156
|
self._max_train_timeout = max_train_timeout
|
156
157
|
self._cutoff_dt = cutoff_dt
|
157
158
|
self.embedding_cols = embedding_cols
|
159
|
+
self._allowed_models = allowed_models
|
158
160
|
|
159
161
|
def _provide_study(self, column: str) -> optuna.Study:
|
160
162
|
storage_name = f"sqlite:///{self._folder}/{column}/{_STUDYDB_FILENAME}"
|
@@ -265,7 +267,7 @@ class Trainer(Fit):
|
|
265
267
|
print(f"Row weights took {time.time() - start_row_weights}")
|
266
268
|
|
267
269
|
# Create model
|
268
|
-
model = ModelRouter()
|
270
|
+
model = ModelRouter(self._allowed_models)
|
269
271
|
model.set_options(trial, x)
|
270
272
|
|
271
273
|
# Train
|
@@ -517,7 +519,7 @@ class Trainer(Fit):
|
|
517
519
|
reducer = CombinedReducer(self.embedding_cols)
|
518
520
|
reducer.load(folder)
|
519
521
|
|
520
|
-
model = ModelRouter()
|
522
|
+
model = ModelRouter(None)
|
521
523
|
model.load(folder)
|
522
524
|
|
523
525
|
selector = Selector(model)
|
@@ -570,7 +572,7 @@ class Trainer(Fit):
|
|
570
572
|
if not os.path.isdir(date_path):
|
571
573
|
continue
|
572
574
|
try:
|
573
|
-
model = ModelRouter()
|
575
|
+
model = ModelRouter(None)
|
574
576
|
model.load(date_path)
|
575
577
|
feature_importances[date_str] = model.feature_importances
|
576
578
|
except FileNotFoundError as exc:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: wavetrainer
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.9
|
4
4
|
Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
|
5
5
|
Home-page: https://github.com/8W9aG/wavetrainer
|
6
6
|
Author: Will Sackfield
|
@@ -28,6 +28,7 @@ Requires-Dist: xgboost>=3.0.0
|
|
28
28
|
Requires-Dist: jax>=0.6.1
|
29
29
|
Requires-Dist: tabpfn_extensions>=0.0.4
|
30
30
|
Requires-Dist: hyperopt>=0.2.7
|
31
|
+
Requires-Dist: pycaleva>=0.8.2
|
31
32
|
|
32
33
|
# wavetrainer
|
33
34
|
|
@@ -62,6 +63,7 @@ Python 3.11.6:
|
|
62
63
|
- [jax](https://github.com/jax-ml/jax)
|
63
64
|
- [tabpfn_extensions](https://github.com/PriorLabs/tabpfn-extensions)
|
64
65
|
- [hyperopt](https://github.com/hyperopt/hyperopt)
|
66
|
+
- [pycaleva](https://github.com/MartinWeigl/pycaleva)
|
65
67
|
|
66
68
|
## Raison D'être :thought_balloon:
|
67
69
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model/catboost/catboost_classifier_wrap.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model/catboost/catboost_regressor_wrap.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/reducer/non_categorical_numeric_columns.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|